You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@stanbol.apache.org by rw...@apache.org on 2013/03/11 14:19:02 UTC

svn commit: r1455131 [2/7] - in /stanbol/branches/stanbol-solr4: commons/ commons/frameworkfragment/ commons/solr/core/ commons/solr/core/src/main/java/org/apache/stanbol/commons/solr/ commons/solr/core/src/main/java/org/apache/stanbol/commons/solr/uti...

Modified: stanbol/branches/stanbol-solr4/enhancement-engines/entitylinking/labeltokenizer-smartcn/src/main/resources/config/org.apache.stanbol.enhancer.engines.entitylinking.labeltokenizer.lucene.LuceneLabelTokenizer-smartcn.config
URL: http://svn.apache.org/viewvc/stanbol/branches/stanbol-solr4/enhancement-engines/entitylinking/labeltokenizer-smartcn/src/main/resources/config/org.apache.stanbol.enhancer.engines.entitylinking.labeltokenizer.lucene.LuceneLabelTokenizer-smartcn.config?rev=1455131&r1=1455130&r2=1455131&view=diff
==============================================================================
--- stanbol/branches/stanbol-solr4/enhancement-engines/entitylinking/labeltokenizer-smartcn/src/main/resources/config/org.apache.stanbol.enhancer.engines.entitylinking.labeltokenizer.lucene.LuceneLabelTokenizer-smartcn.config (original)
+++ stanbol/branches/stanbol-solr4/enhancement-engines/entitylinking/labeltokenizer-smartcn/src/main/resources/config/org.apache.stanbol.enhancer.engines.entitylinking.labeltokenizer.lucene.LuceneLabelTokenizer-smartcn.config Mon Mar 11 13:18:59 2013
@@ -1,5 +1,5 @@
 enhancer.engines.entitylinking.labeltokenizer.languages=["zh"]
 enhancer.engine.linking.labeltokenizer.lucene.charFilterFactory=""
-enhancer.engine.linking.labeltokenizer.lucene.tokenizerFactory="org.apache.solr.analysis.SmartChineseSentenceTokenizerFactory"
-enhancer.engine.linking.labeltokenizer.lucene.tokenFilterFactory=["org.apache.solr.analysis.SmartChineseWordTokenFilterFactory"]
+enhancer.engine.linking.labeltokenizer.lucene.tokenizerFactory="org.apache.lucene.analysis.cn.smart.SmartChineseSentenceTokenizerFactory"
+enhancer.engine.linking.labeltokenizer.lucene.tokenFilterFactory=["org.apache.lucene.analysis.cn.smart.SmartChineseWordTokenFilterFactory"]
 service.ranking=I"100"

Modified: stanbol/branches/stanbol-solr4/enhancement-engines/entitylinking/pom.xml
URL: http://svn.apache.org/viewvc/stanbol/branches/stanbol-solr4/enhancement-engines/entitylinking/pom.xml?rev=1455131&r1=1455130&r2=1455131&view=diff
==============================================================================
--- stanbol/branches/stanbol-solr4/enhancement-engines/entitylinking/pom.xml (original)
+++ stanbol/branches/stanbol-solr4/enhancement-engines/entitylinking/pom.xml Mon Mar 11 13:18:59 2013
@@ -56,6 +56,8 @@
     <!-- Chinese Label Tokenizer -->
     <module>labeltokenizer-smartcn</module> <!-- config for the lucene label tokenizer -->
     <module>labeltokenizer-paoding</module> <!-- implementation based on paoding -->
+    <!-- Japanese -->
+    <module>labeltokenizer-kuromoji</module> <!-- configuration based on kuromoji-->
   </modules>
 
   <profiles>

Modified: stanbol/branches/stanbol-solr4/enhancement-engines/entitytagging/pom.xml
URL: http://svn.apache.org/viewvc/stanbol/branches/stanbol-solr4/enhancement-engines/entitytagging/pom.xml?rev=1455131&r1=1455130&r2=1455131&view=diff
==============================================================================
--- stanbol/branches/stanbol-solr4/enhancement-engines/entitytagging/pom.xml (original)
+++ stanbol/branches/stanbol-solr4/enhancement-engines/entitytagging/pom.xml Mon Mar 11 13:18:59 2013
@@ -151,7 +151,7 @@
      <dependency>
       <groupId>org.apache.stanbol</groupId>
       <artifactId>org.apache.stanbol.entityhub.yard.solr</artifactId>
-      <version>0.11.0</version>
+      <version>0.12.0-SNAPSHOT</version>
       <scope>test</scope>
     </dependency>
      <dependency>

Propchange: stanbol/branches/stanbol-solr4/enhancement-engines/kuromoji-nlp/
------------------------------------------------------------------------------
--- svn:ignore (added)
+++ svn:ignore Mon Mar 11 13:18:59 2013
@@ -0,0 +1,7 @@
+.settings
+
+.classpath
+
+.project
+
+target

Added: stanbol/branches/stanbol-solr4/enhancement-engines/kuromoji-nlp/pom.xml
URL: http://svn.apache.org/viewvc/stanbol/branches/stanbol-solr4/enhancement-engines/kuromoji-nlp/pom.xml?rev=1455131&view=auto
==============================================================================
--- stanbol/branches/stanbol-solr4/enhancement-engines/kuromoji-nlp/pom.xml (added)
+++ stanbol/branches/stanbol-solr4/enhancement-engines/kuromoji-nlp/pom.xml Mon Mar 11 13:18:59 2013
@@ -0,0 +1,127 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor 
+  license agreements. See the NOTICE file distributed with this work for additional 
+  information regarding copyright ownership. The ASF licenses this file to 
+  You under the Apache License, Version 2.0 (the "License"); you may not use 
+  this file except in compliance with the License. You may obtain a copy of 
+  the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required 
+  by applicable law or agreed to in writing, software distributed under the 
+  License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS 
+  OF ANY KIND, either express or implied. See the License for the specific 
+  language governing permissions and limitations under the License. -->
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+  <modelVersion>4.0.0</modelVersion>
+
+  <parent>
+    <groupId>org.apache.stanbol</groupId>
+    <artifactId>apache-stanbol-enhancement-engines</artifactId>
+    <version>0.10.1-SNAPSHOT</version>
+    <relativePath>..</relativePath>
+  </parent>
+
+  <groupId>org.apache.stanbol</groupId>
+  <artifactId>org.apache.stanbol.enhancer.engines.kuromoji.nlp</artifactId>
+  <version>0.10.1-SNAPSHOT</version>
+  <packaging>bundle</packaging>
+
+  <name>Apache Stanbol Enhancement Engine : Kuromoji NLP </name>
+  <description>NLP processing based on the Lucene Kuromoji module</description>
+
+  <inceptionYear>2013</inceptionYear>
+
+  <scm>
+    <connection>
+      scm:svn:http://svn.apache.org/repos/asf/stanbol/trunk/enhancement-engines/kuromoji-nlp
+    </connection>
+    <developerConnection>
+      scm:svn:https://svn.apache.org/repos/asf/stanbol/trunk/enhancement-engines/kuromoji-nlp
+    </developerConnection>
+    <url>http://stanbol.apache.org/</url>
+  </scm>
+
+  <build>
+    <plugins>
+      <plugin>
+        <groupId>org.apache.felix</groupId>
+        <artifactId>maven-bundle-plugin</artifactId>
+        <extensions>true</extensions>
+        <configuration>
+          <instructions>
+            <Import-Package>
+              org.apache.stanbol.enhancer.servicesapi; provide:=true; version="[0.10,0.12)",
+              org.apache.stanbol.enhancer.servicesapi.impl; provide:=true; version="[0.10,0.12)",
+              *
+            </Import-Package>
+            <Private-Package>
+              org.apache.stanbol.enhancer.engines.kuromoji.impl
+            </Private-Package>
+          </instructions>
+        </configuration>
+      </plugin>
+      <plugin>
+        <groupId>org.apache.rat</groupId>
+        <artifactId>apache-rat-plugin</artifactId>
+        <configuration>
+          <excludes>
+            <!-- AL20 License -->
+            <exclude>src/license/THIRD-PARTY.properties</exclude>
+          </excludes>
+        </configuration>
+      </plugin>
+      <plugin>
+        <groupId>org.apache.felix</groupId>
+        <artifactId>maven-scr-plugin</artifactId>
+      </plugin>
+    </plugins>
+  </build>
+
+  <dependencies>
+    <dependency>
+      <groupId>org.apache.stanbol</groupId>
+      <artifactId>org.apache.stanbol.enhancer.servicesapi</artifactId>
+      <version>0.10.0</version>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.stanbol</groupId>
+      <artifactId>org.apache.stanbol.commons.solr.core</artifactId>
+      <version>0.12.0-SNAPSHOT</version>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.lucene</groupId>
+      <artifactId>lucene-analyzers-kuromoji</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.stanbol</groupId>
+      <artifactId>org.apache.stanbol.enhancer.nlp</artifactId>
+      <version>0.10.0</version>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.felix</groupId>
+      <artifactId>org.apache.felix.scr.annotations</artifactId>
+    </dependency>
+    <!-- for tests -->
+    <dependency>
+      <groupId>org.apache.stanbol</groupId>
+      <artifactId>org.apache.stanbol.enhancer.test</artifactId>
+      <version>0.11.0-SNAPSHOT</version>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.stanbol</groupId>
+      <artifactId>org.apache.stanbol.enhancer.core</artifactId>
+      <version>0.11.0-SNAPSHOT</version>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>junit</groupId>
+      <artifactId>junit</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.slf4j</groupId>
+      <artifactId>slf4j-simple</artifactId>
+      <scope>test</scope>
+    </dependency>    
+  </dependencies>
+
+</project>

Added: stanbol/branches/stanbol-solr4/enhancement-engines/kuromoji-nlp/src/main/java/org/apache/stanbol/enhancer/engines/kuromoji/Constants.java
URL: http://svn.apache.org/viewvc/stanbol/branches/stanbol-solr4/enhancement-engines/kuromoji-nlp/src/main/java/org/apache/stanbol/enhancer/engines/kuromoji/Constants.java?rev=1455131&view=auto
==============================================================================
--- stanbol/branches/stanbol-solr4/enhancement-engines/kuromoji-nlp/src/main/java/org/apache/stanbol/enhancer/engines/kuromoji/Constants.java (added)
+++ stanbol/branches/stanbol-solr4/enhancement-engines/kuromoji-nlp/src/main/java/org/apache/stanbol/enhancer/engines/kuromoji/Constants.java Mon Mar 11 13:18:59 2013
@@ -0,0 +1,526 @@
+package org.apache.stanbol.enhancer.engines.kuromoji;
+
+import org.apache.lucene.analysis.ja.util.ToStringUtil;
+import org.apache.stanbol.enhancer.nlp.model.tag.TagSet;
+import org.apache.stanbol.enhancer.nlp.ner.NerTag;
+import org.apache.stanbol.enhancer.nlp.pos.LexicalCategory;
+import org.apache.stanbol.enhancer.nlp.pos.Pos;
+import org.apache.stanbol.enhancer.nlp.pos.PosTag;
+import org.apache.stanbol.enhancer.servicesapi.rdf.OntologicalClasses;
+
+/**
+ * Defines mappings of the String tags used by Kuromoji to the vocabulary used
+ * by the Stanbol NLP processing module
+ * @author Rupert Westenthaler
+ */
+public class Constants {
+
+    
+    /**
+     * set of part of speech tags as defined in the {@link ToStringUtil} class.
+     * Descriptions are taken from the 
+     * <a herf="http://lucene-gosen.googlecode.com/svn/trunk/example/stoptags_ja.txt">
+     * Gosen Pos Tag Documentation</a> as the Tag Set used by Kuromoji does 
+     * exactly match those used by Gosen.
+     */
+    public static final TagSet<PosTag> POS_TAG_SET = new TagSet<PosTag>("Kuromoji Japanese", "ja");
+    /**
+     * PosTags representing Named Entities of type Persons
+     */
+    public static final TagSet<NerTag> NER_TAG_SET = new TagSet<NerTag>("Kuromoji Japanese", "ja");
+    
+    static {
+         /**
+         *  noun: unclassified nouns
+         */
+        POS_TAG_SET.addTag(new PosTag("名詞",LexicalCategory.Noun));
+        /**
+         *  noun-common: Common nouns or nouns where the sub-classification is undefined
+         */
+        POS_TAG_SET.addTag(new PosTag("名詞-一般",Pos.CommonNoun));
+        /**
+         *  noun-proper: Proper nouns where the sub-classification is undefined 
+         */
+        POS_TAG_SET.addTag(new PosTag("名詞-固有名詞",Pos.ProperNoun));
+         /**
+         *  noun-proper-misc: miscellaneous proper nouns
+         */
+        POS_TAG_SET.addTag(new PosTag("名詞-固有名詞-一般",Pos.ProperNoun));
+         /**
+         *  noun-proper-person: Personal names where the sub-classification is undefined
+         */
+        POS_TAG_SET.addTag(new PosTag("名詞-固有名詞-人名",Pos.ProperNoun));
+        NER_TAG_SET.addTag(new NerTag("名詞-固有名詞-人名",OntologicalClasses.DBPEDIA_PERSON));
+         /**
+         *  noun-proper-person-misc: names that cannot be divided into surname and 
+         *  given name; foreign names; names where the surname or given name is unknown.
+         *  e.g. お市の方
+         */
+        POS_TAG_SET.addTag(new PosTag("名詞-固有名詞-人名-一般",Pos.ProperNoun));
+        NER_TAG_SET.addTag(new NerTag("名詞-固有名詞-人名-一般",OntologicalClasses.DBPEDIA_PERSON));
+         /**
+         *  noun-proper-person-surname: Mainly Japanese surnames.
+         *  e.g. 山田
+         */
+        POS_TAG_SET.addTag(new PosTag("名詞-固有名詞-人名-姓",Pos.ProperNoun));
+        NER_TAG_SET.addTag(new NerTag("名詞-固有名詞-人名-姓",OntologicalClasses.DBPEDIA_PERSON));
+         /**
+         *  noun-proper-person-given_name: Mainly Japanese given names.
+         *  e.g. 太郎
+         */
+        POS_TAG_SET.addTag(new PosTag("名詞-固有名詞-人名-名",Pos.ProperNoun));
+        NER_TAG_SET.addTag(new NerTag("名詞-固有名詞-人名-名",OntologicalClasses.DBPEDIA_PERSON));
+         /**
+         *  noun-proper-organization: Names representing organizations.
+         *  e.g. 通産省, NHK
+         */
+        POS_TAG_SET.addTag(new PosTag("名詞-固有名詞-組織",Pos.ProperNoun));
+        NER_TAG_SET.addTag(new NerTag("名詞-固有名詞-組織",OntologicalClasses.DBPEDIA_ORGANISATION));
+         /**
+         *  noun-proper-place: Place names where the sub-classification is undefined
+         */
+        POS_TAG_SET.addTag(new PosTag("名詞-固有名詞-地域",Pos.ProperNoun));
+        NER_TAG_SET.addTag(new NerTag("名詞-固有名詞-地域",OntologicalClasses.DBPEDIA_PLACE));
+         /**
+         *  noun-proper-place-misc: Place names excluding countries.
+         *  e.g. アジア, バルセロナ, 京都
+         */
+        POS_TAG_SET.addTag(new PosTag("名詞-固有名詞-地域-一般",Pos.ProperNoun));
+        NER_TAG_SET.addTag(new NerTag("名詞-固有名詞-地域-一般",OntologicalClasses.DBPEDIA_PLACE));
+         /**
+         *  noun-proper-place-country: Country names. 
+         *  e.g. 日本, オーストラリア
+         */
+        POS_TAG_SET.addTag(new PosTag("名詞-固有名詞-地域-国",Pos.ProperNoun));
+        NER_TAG_SET.addTag(new NerTag("名詞-固有名詞-地域-国",OntologicalClasses.DBPEDIA_PLACE));
+         /**
+         *  noun-pronoun: Pronouns where the sub-classification is undefined
+         */
+        POS_TAG_SET.addTag(new PosTag("名詞-代名詞",Pos.Pronoun));
+         /**
+         *  noun-pronoun-misc: miscellaneous pronouns: 
+         *  e.g. それ, ここ, あいつ, あなた, あちこち, いくつ, どこか, なに, みなさん, みんな, わたくし, われわれ
+         */
+        POS_TAG_SET.addTag(new PosTag("名詞-代名詞-一般",Pos.Pronoun));
+         /**
+         *  noun-pronoun-contraction: Spoken language contraction made by combining a 
+         *  pronoun and the particle 'wa'.
+         *  e.g. ありゃ, こりゃ, こりゃあ, そりゃ, そりゃあ 
+         */
+        POS_TAG_SET.addTag(new PosTag("名詞-代名詞-縮約",Pos.Pronoun,Pos.Participle));
+         /**
+         *  noun-adverbial: Temporal nouns such as names of days or months that behave 
+         *  like adverbs. Nouns that represent amount or ratios and can be used adverbially,
+         *  e.g. 金曜, 一月, 午後, 少量
+         */
+        POS_TAG_SET.addTag(new PosTag("名詞-副詞可能",LexicalCategory.Adverb,Pos.CommonNoun));
+         /**
+         *  noun-verbal: Nouns that take arguments with case and can appear followed by 
+         *  'suru' and related verbs (する, できる, なさる, くださる)
+         *  e.g. インプット, 愛着, 悪化, 悪戦苦闘, 一安心, 下取り
+         */
+        POS_TAG_SET.addTag(new PosTag("名詞-サ変接続",Pos.VerbalNoun));
+         /**
+         *  noun-adjective-base: The base form of adjectives, words that appear before な ("na")
+         *  e.g. 健康, 安易, 駄目, だめ
+         */
+        POS_TAG_SET.addTag(new PosTag("名詞-形容動詞語幹",LexicalCategory.Adjective,Pos.CommonNoun));
+         /**
+         *  noun-numeric: Arabic numbers, Chinese numerals, and counters like 何 (回), 数.
+         *  e.g. 0, 1, 2, 何, 数, 幾
+         */
+        POS_TAG_SET.addTag(new PosTag("名詞-数",Pos.CardinalNumber));
+         /**
+         *  noun-affix: noun affixes where the sub-classification is undefined
+         */
+        POS_TAG_SET.addTag(new PosTag("名詞-非自立",LexicalCategory.Noun));
+         /**
+         *  noun-affix-misc: Of adnominalizers, the case-marker の ("no"), and words that 
+         *  attach to the base form of inflectional words, words that cannot be classified 
+         *  into any of the other categories below. This category includes indefinite nouns.
+         *  e.g. あかつき, 暁, かい, 甲斐, 気, きらい, 嫌い, くせ, 癖, こと, 事, ごと, 毎, しだい, 次第, 
+         *       順, せい, 所為, ついで, 序で, つもり, 積もり, 点, どころ, の, はず, 筈, はずみ, 弾み, 
+         *       拍子, ふう, ふり, 振り, ほう, 方, 旨, もの, 物, 者, ゆえ, 故, ゆえん, 所以, わけ, 訳,
+         *       わり, 割り, 割, ん-口語/, もん-口語/
+         */
+        POS_TAG_SET.addTag(new PosTag("名詞-非自立-一般",LexicalCategory.Noun));
+         /**
+         *  noun-affix-adverbial: noun affixes that that can behave as adverbs.
+         *  e.g. あいだ, 間, あげく, 挙げ句, あと, 後, 余り, 以外, 以降, 以後, 以上, 以前, 一方, うえ, 
+         *       上, うち, 内, おり, 折り, かぎり, 限り, きり, っきり, 結果, ころ, 頃, さい, 際, 最中, さなか, 
+         *       最中, じたい, 自体, たび, 度, ため, 為, つど, 都度, とおり, 通り, とき, 時, ところ, 所, 
+         *       とたん, 途端, なか, 中, のち, 後, ばあい, 場合, 日, ぶん, 分, ほか, 他, まえ, 前, まま, 
+         *       儘, 侭, みぎり, 矢先
+         */
+        POS_TAG_SET.addTag(new PosTag("名詞-非自立-副詞可能",LexicalCategory.Noun,LexicalCategory.Adverb));
+         /**
+         *  noun-affix-aux: noun affixes treated as 助動詞 ("auxiliary verb") in school grammars 
+         *  with the stem よう(だ) ("you(da)").
+         *  e.g.  よう, やう, 様 (よう)
+         */
+        POS_TAG_SET.addTag(new PosTag("名詞-非自立-助動詞語幹",Pos.VerbalNoun,Pos.AuxiliaryVerb));
+         /**  
+         *  noun-affix-adjective-base: noun affixes that can connect to the indeclinable
+         *  connection form な (aux "da").
+         *  e.g. みたい, ふう
+         */
+        POS_TAG_SET.addTag(new PosTag("名詞-非自立-形容動詞語幹",LexicalCategory.Noun,LexicalCategory.Adjective));
+         /**
+         *  noun-special: special nouns where the sub-classification is undefined.
+         */
+        POS_TAG_SET.addTag(new PosTag("名詞-特殊",LexicalCategory.Noun));
+         /**
+         *  noun-special-aux: The そうだ ("souda") stem form that is used for reporting news, is 
+         *  treated as 助動詞 ("auxiliary verb") in school grammars, and attach to the base 
+         *  form of inflectional words.
+         *  e.g. そう
+         */
+        POS_TAG_SET.addTag(new PosTag("名詞-特殊-助動詞語幹",LexicalCategory.Noun));
+         /**
+         *  noun-suffix: noun suffixes where the sub-classification is undefined.
+         */
+        POS_TAG_SET.addTag(new PosTag("名詞-接尾",LexicalCategory.Noun));
+         /**
+         *  noun-suffix-misc: Of the nouns or stem forms of other parts of speech that connect 
+         *  to ガル or タイ and can combine into compound nouns, words that cannot be classified into
+         *  any of the other categories below. In general, this category is more inclusive than 
+         *  接尾語 ("suffix") and is usually the last element in a compound noun.
+         *  e.g. おき, かた, 方, 甲斐 (がい), がかり, ぎみ, 気味, ぐるみ, (~した) さ, 次第, 済 (ず) み,
+         *       よう, (でき)っこ, 感, 観, 性, 学, 類, 面, 用
+         */
+        POS_TAG_SET.addTag(new PosTag("名詞-接尾-一般",LexicalCategory.Noun));
+         /**
+         *  noun-suffix-person: Suffixes that form nouns and attach to person names more often
+         *  than other nouns.
+         *  e.g. 君, 様, 著
+         */
+        POS_TAG_SET.addTag(new PosTag("名詞-接尾-人名",LexicalCategory.Noun));
+        NER_TAG_SET.addTag(new NerTag("名詞-接尾-人名",OntologicalClasses.DBPEDIA_PERSON));
+         /**
+         *  noun-suffix-place: Suffixes that form nouns and attach to place names more often 
+         *  than other nouns.
+         *  e.g. 町, 市, 県
+         */
+        POS_TAG_SET.addTag(new PosTag("名詞-接尾-地域",LexicalCategory.Noun));
+        NER_TAG_SET.addTag(new NerTag("名詞-接尾-地域",OntologicalClasses.DBPEDIA_PLACE));
+         /**
+         *  noun-suffix-verbal: Of the suffixes that attach to nouns and form nouns, those that 
+         *  can appear before スル ("suru").
+         *  e.g. 化, 視, 分け, 入り, 落ち, 買い
+         */
+        POS_TAG_SET.addTag(new PosTag("名詞-接尾-サ変接続",Pos.VerbalNoun));
+         /**
+         *  noun-suffix-aux: The stem form of そうだ (様態) that is used to indicate conditions, 
+         *  is treated as 助動詞 ("auxiliary verb") in school grammars, and attach to the 
+         *  conjunctive form of inflectional words.
+         *  e.g. そう
+         */
+        POS_TAG_SET.addTag(new PosTag("名詞-接尾-助動詞語幹",Pos.VerbalNoun,Pos.AuxiliaryVerb));
+         /**
+         *  noun-suffix-adjective-base: Suffixes that attach to other nouns or the conjunctive 
+         *  form of inflectional words and appear before the copula だ ("da").
+         *  e.g. 的, げ, がち
+         */
+        POS_TAG_SET.addTag(new PosTag("名詞-接尾-形容動詞語幹",LexicalCategory.Noun,LexicalCategory.Adjective));
+         /**
+         *  noun-suffix-adverbial: Suffixes that attach to other nouns and can behave as adverbs.
+         *  e.g. 後 (ご), 以後, 以降, 以前, 前後, 中, 末, 上, 時 (じ)
+         */
+        POS_TAG_SET.addTag(new PosTag("名詞-接尾-副詞可能",LexicalCategory.Noun,LexicalCategory.Adverb));
+         /**
+         *  noun-suffix-classifier: Suffixes that attach to numbers and form nouns. This category 
+         *  is more inclusive than 助数詞 ("classifier") and includes common nouns that attach 
+         *  to numbers.
+         *  e.g. 個, つ, 本, 冊, パーセント, cm, kg, カ月, か国, 区画, 時間, 時半
+         */
+        POS_TAG_SET.addTag(new PosTag("名詞-接尾-助数詞",Pos.UnitNoun));
+         /**
+         *  noun-suffix-special: Special suffixes that mainly attach to inflecting words.
+         *  e.g. (楽し) さ, (考え) 方
+         */
+        POS_TAG_SET.addTag(new PosTag("名詞-接尾-特殊",Pos.CommonNoun));
+         /**
+         *  noun-suffix-conjunctive: Nouns that behave like conjunctions and join two words 
+         *  together.
+         *  e.g. (日本) 対 (アメリカ), 対 (アメリカ), (3) 対 (5), (女優) 兼 (主婦)
+         */
+        POS_TAG_SET.addTag(new PosTag("名詞-接続詞的",LexicalCategory.Conjuction,Pos.CommonNoun));
+         /**
+         *  noun-verbal_aux: Nouns that attach to the conjunctive particle て ("te") and are 
+         *  semantically verb-like.
+         *  e.g. ごらん, ご覧, 御覧, 頂戴
+         */
+        POS_TAG_SET.addTag(new PosTag("名詞-動詞非自立的",Pos.VerbalNoun,Pos.AuxiliaryVerb));
+         /**
+         *  noun-quotation: text that cannot be segmented into words, proverbs, Chinese poetry, 
+         *  dialects, English, etc. Currently, the only entry for 名詞 引用文字列 ("noun quotation") 
+         *  is いわく ("iwaku").
+         */
+        POS_TAG_SET.addTag(new PosTag("名詞-引用文字列",LexicalCategory.Noun));
+         /**
+         *  noun-nai_adjective: Words that appear before the auxiliary verb ない ("nai") and
+         *  behave like an adjective.
+         *  e.g. 申し訳, 仕方, とんでも, 違い
+         */
+        POS_TAG_SET.addTag(new PosTag("名詞-ナイ形容詞語幹",LexicalCategory.Noun,LexicalCategory.Adjective));
+         /**
+         *  prefix: unclassified prefixes
+         */
+        POS_TAG_SET.addTag(new PosTag("接頭詞"));
+         /**
+         *  prefix-nominal: Prefixes that attach to nouns (including adjective stem forms) 
+         *  excluding numerical expressions.
+         *  e.g. お (水), 某 (氏), 同 (社), 故 (~氏), 高 (品質), お (見事), ご (立派)
+         */
+        POS_TAG_SET.addTag(new PosTag("接頭詞-名詞接続",LexicalCategory.Noun));
+         /**
+         *  prefix-verbal: Prefixes that attach to the imperative form of a verb or a verb
+         *  in conjunctive form followed by なる/なさる/くださる.
+         *  e.g. お (読みなさい), お (座り)
+         */
+        POS_TAG_SET.addTag(new PosTag("接頭詞-動詞接続",LexicalCategory.Verb));
+         /**
+         *  prefix-adjectival: Prefixes that attach to adjectives.
+         *  e.g. お (寒いですねえ), バカ (でかい)
+         */
+        POS_TAG_SET.addTag(new PosTag("接頭詞-形容詞接続",LexicalCategory.Adjective));
+         /**
+         *  prefix-numerical: Prefixes that attach to numerical expressions.
+         *  e.g. 約, およそ, 毎時
+         */
+        POS_TAG_SET.addTag(new PosTag("接頭詞-数接続",Pos.Numeral));
+         /**
+         *  verb: unclassified verbs
+         */
+        POS_TAG_SET.addTag(new PosTag("動詞",LexicalCategory.Verb));
+         /**
+         *  verb-main:
+         */
+        POS_TAG_SET.addTag(new PosTag("動詞-自立",Pos.MainVerb));
+         /**
+         *  verb-auxiliary:
+         */
+        POS_TAG_SET.addTag(new PosTag("動詞-非自立",Pos.AuxiliaryVerb));
+         /**
+         *  verb-suffix:
+         */
+        POS_TAG_SET.addTag(new PosTag("動詞-接尾",LexicalCategory.Verb));
+         /**
+         *  adjective: unclassified adjectives
+         */
+        POS_TAG_SET.addTag(new PosTag("形容詞",LexicalCategory.Adjective));
+         /**
+         *  adjective-main:
+         */
+        POS_TAG_SET.addTag(new PosTag("形容詞-自立",LexicalCategory.Adjective));
+         /**
+         *  adjective-auxiliary:
+         */
+        POS_TAG_SET.addTag(new PosTag("形容詞-非自立",LexicalCategory.Adjective));
+         /**
+         *  adjective-suffix:
+         */
+        POS_TAG_SET.addTag(new PosTag("形容詞-接尾",LexicalCategory.Adjective));
+         /**
+         *  adverb: unclassified adverbs
+         */
+        POS_TAG_SET.addTag(new PosTag("副詞",LexicalCategory.Adverb));
+         /**
+         *  adverb-misc: Words that can be segmented into one unit and where adnominal 
+         *  modification is not possible.
+         *  e.g. あいかわらず, 多分
+         */
+        POS_TAG_SET.addTag(new PosTag("副詞-一般",LexicalCategory.Adverb));
+         /**
+         *  adverb-particle_conjunction: Adverbs that can be followed by の, は, に, 
+         *  な, する, だ, etc.
+         *  e.g. こんなに, そんなに, あんなに, なにか, なんでも
+         */
+        POS_TAG_SET.addTag(new PosTag("副詞-助詞類接続",LexicalCategory.Adverb,Pos.CoordinationParticle));
+         /**
+         *  adnominal: Words that only have noun-modifying forms.
+         *  e.g. この, その, あの, どの, いわゆる, なんらかの, 何らかの, いろんな, こういう, そういう, ああいう, 
+         *       どういう, こんな, そんな, あんな, どんな, 大きな, 小さな, おかしな, ほんの, たいした, 
+         *       「(, も) さる (ことながら)」, 微々たる, 堂々たる, 単なる, いかなる, 我が」「同じ, 亡き
+         */
+        POS_TAG_SET.addTag(new PosTag("連体詞",LexicalCategory.Adjective));
+         /**
+         *  conjunction: Conjunctions that can occur independently.
+         *  e.g. が, けれども, そして, じゃあ, それどころか
+         */
+        POS_TAG_SET.addTag(new PosTag("接続詞",LexicalCategory.Conjuction));
+         /**
+         *  particle: unclassified particles.
+         */
+        POS_TAG_SET.addTag(new PosTag("助詞",Pos.Particle));
+         /**
+         *  particle-case: case particles where the subclassification is undefined.
+         */
+        POS_TAG_SET.addTag(new PosTag("助詞-格助詞",Pos.Particle));
+         /**
+         *  particle-case-misc: Case particles.
+         *  e.g. から, が, で, と, に, へ, より, を, の, にて
+         */
+        POS_TAG_SET.addTag(new PosTag("助詞-格助詞-一般",Pos.Particle));
+         /**
+         *  particle-case-quote: the "to" that appears after nouns, a person’s speech, 
+         *  quotation marks, expressions of decisions from a meeting, reasons, judgements,
+         *  conjectures, etc.
+         *  e.g. ( だ) と (述べた.), ( である) と (して執行猶予...)
+         */
+        POS_TAG_SET.addTag(new PosTag("助詞-格助詞-引用",Pos.Particle));
+         /**
+         *  particle-case-compound: Compounds of particles and verbs that mainly behave 
+         *  like case particles.
+         *  e.g. という, といった, とかいう, として, とともに, と共に, でもって, にあたって, に当たって, に当って,
+         *       にあたり, に当たり, に当り, に当たる, にあたる, において, に於いて,に於て, における, に於ける, 
+         *       にかけ, にかけて, にかんし, に関し, にかんして, に関して, にかんする, に関する, に際し, 
+         *       に際して, にしたがい, に従い, に従う, にしたがって, に従って, にたいし, に対し, にたいして, 
+         *       に対して, にたいする, に対する, について, につき, につけ, につけて, につれ, につれて, にとって,
+         *       にとり, にまつわる, によって, に依って, に因って, により, に依り, に因り, による, に依る, に因る, 
+         *       にわたって, にわたる, をもって, を以って, を通じ, を通じて, を通して, をめぐって, をめぐり, をめぐる,
+         *       って-口語/, ちゅう-関西弁「という」/, (何) ていう (人)-口語/, っていう-口語/, といふ, とかいふ
+         */
+        POS_TAG_SET.addTag(new PosTag("助詞-格助詞-連語",Pos.Particle));
+         /**
+         *  particle-conjunctive:
+         *  e.g. から, からには, が, けれど, けれども, けど, し, つつ, て, で, と, ところが, どころか, とも, ども, 
+         *       ながら, なり, ので, のに, ば, ものの, や ( した), やいなや, (ころん) じゃ(いけない)-口語/, 
+         *       (行っ) ちゃ(いけない)-口語/, (言っ) たって (しかたがない)-口語/, (それがなく)ったって (平気)-口語/
+         */
+        POS_TAG_SET.addTag(new PosTag("助詞-接続助詞",Pos.ConjunctionPhrase,Pos.Particle));
+         /**
+         *  particle-dependency:
+         *  e.g. こそ, さえ, しか, すら, は, も, ぞ
+         */
+        POS_TAG_SET.addTag(new PosTag("助詞-係助詞",Pos.Particle));
+         /**
+         *  particle-adverbial:
+         *  e.g. がてら, かも, くらい, 位, ぐらい, しも, (学校) じゃ(これが流行っている)-口語/, 
+         *       (それ)じゃあ (よくない)-口語/, ずつ, (私) なぞ, など, (私) なり (に), (先生) なんか (大嫌い)-口語/,
+         *       (私) なんぞ, (先生) なんて (大嫌い)-口語/, のみ, だけ, (私) だって-口語/, だに, 
+         *       (彼)ったら-口語/, (お茶) でも (いかが), 等 (とう), (今後) とも, ばかり, ばっか-口語/, ばっかり-口語/,
+         *       ほど, 程, まで, 迄, (誰) も (が)([助詞-格助詞] および [助詞-係助詞] の前に位置する「も」)
+         */
+        POS_TAG_SET.addTag(new PosTag("助詞-副助詞",Pos.AdverbialParticiple));
+         /**
+         *  particle-interjective: particles with interjective grammatical roles.
+         *  e.g. (松島) や
+         */
+        POS_TAG_SET.addTag(new PosTag("助詞-間投助詞",Pos.Interjection,Pos.Particle));
+         /**
+         *  particle-coordinate:
+         *  e.g. と, たり, だの, だり, とか, なり, や, やら
+         */
+        POS_TAG_SET.addTag(new PosTag("助詞-並立助詞",Pos.CoordinationParticle));
+         /**
+         *  particle-final:
+         *  e.g. かい, かしら, さ, ぜ, (だ)っけ-口語/, (とまってる) で-方言/, な, ナ, なあ-口語/, ぞ, ね, ネ, 
+         *       ねぇ-口語/, ねえ-口語/, ねん-方言/, の, のう-口語/, や, よ, ヨ, よぉ-口語/, わ, わい-口語/
+         */
+        POS_TAG_SET.addTag(new PosTag("助詞-終助詞",Pos.Particle));
+         /**
+         *  particle-adverbial/conjunctive/final: The particle "ka" when unknown whether it is 
+         *  adverbial, conjunctive, or sentence final. For example:
+         *       (a) 「A か B か」. Ex:「(国内で運用する) か,(海外で運用する) か (.)」
+         *       (b) Inside an adverb phrase. Ex:「(幸いという) か (, 死者はいなかった.)」
+         *           「(祈りが届いたせい) か (, 試験に合格した.)」
+         *       (c) 「かのように」. Ex:「(何もなかった) か (のように振る舞った.)」
+         *  e.g. か
+         */
+        POS_TAG_SET.addTag(new PosTag("助詞-副助詞/並立助詞/終助詞",Pos.AdverbialParticiple,Pos.ConjunctionPhrase));
+         /**
+         *  particle-adnominalizer: The "no" that attaches to nouns and modifies 
+         *  non-inflectional words.
+         */
+        POS_TAG_SET.addTag(new PosTag("助詞-連体化",Pos.Particle));
+         /**
+         *  particle-adnominalizer: The "ni" and "to" that appear following nouns and adverbs 
+         *  that are giongo, giseigo, or gitaigo.
+         *  e.g. に, と
+         */
+        POS_TAG_SET.addTag(new PosTag("助詞-副詞化",Pos.Particle));
+         /**
+         *  particle-special: A particle that does not fit into one of the above classifications. 
+         *  This includes particles that are used in Tanka, Haiku, and other poetry.
+         *  e.g. かな, けむ, ( しただろう) に, (あんた) にゃ(わからん), (俺) ん (家)
+         */
+        POS_TAG_SET.addTag(new PosTag("助詞-特殊",Pos.Participle));
+         /**
+         *  auxiliary-verb:
+         */
+        POS_TAG_SET.addTag(new PosTag("助動詞",Pos.AuxiliaryVerb));
+         /**
+         *  interjection: Greetings and other exclamations.
+         *  e.g. おはよう, おはようございます, こんにちは, こんばんは, ありがとう, どうもありがとう, ありがとうございます, 
+         *       いただきます, ごちそうさま, さよなら, さようなら, はい, いいえ, ごめん, ごめんなさい
+         */
+        POS_TAG_SET.addTag(new PosTag("感動詞",Pos.Interjection));
+         /**
+         *  symbol: unclassified Symbols.
+         */
+        POS_TAG_SET.addTag(new PosTag("記号",Pos.Symbol));
+         /**
+         *  symbol-misc: A general symbol not in one of the categories below.
+         *  e.g. [○◎@$〒→+]
+         */
+        POS_TAG_SET.addTag(new PosTag("記号-一般",Pos.Symbol));
+        /**
+        *  symbol-period: Periods and full stops.
+        *  e.g. [..。]
+        */
+       POS_TAG_SET.addTag(new PosTag("記号-句点",Pos.Point));
+         /**
+         *  symbol-comma: Commas
+         *  e.g. [,、]
+         */
+        POS_TAG_SET.addTag(new PosTag("記号-読点",Pos.Comma));
+         /**
+         *  symbol-space: Full-width whitespace.
+         */
+        POS_TAG_SET.addTag(new PosTag("記号-空白",Pos.Symbol));
+         /**
+         *  symbol-open_bracket:
+         *  e.g. [({‘“『【]
+         */
+        POS_TAG_SET.addTag(new PosTag("記号-括弧開",Pos.OpenBracket));
+         /**
+         *  symbol-close_bracket:
+         *  e.g. [)}’”』」】]
+         */
+        POS_TAG_SET.addTag(new PosTag("記号-括弧閉",Pos.CloseBracket));
+         /**
+         *  symbol-alphabetic:
+         */
+        POS_TAG_SET.addTag(new PosTag("記号-アルファベット",Pos.Symbol));
+         /**
+         *  other: unclassified other
+         */
+        POS_TAG_SET.addTag(new PosTag("その他",Pos.Foreign));
+         /**
+         *  other-interjection: Words that are hard to classify as noun-suffixes or 
+         *  sentence-final particles.
+         *  e.g. (だ)ァ
+         */
+        POS_TAG_SET.addTag(new PosTag("その他-間投",LexicalCategory.Noun));
+         /**
+         *  filler: Aizuchi that occurs during a conversation or sounds inserted as filler.
+         *  e.g. あの, うんと, えと
+         */
+        POS_TAG_SET.addTag(new PosTag("フィラー"));
+         /**
+         * * * * *
+         *  non-verbal: non-verbal sound.
+         */
+        POS_TAG_SET.addTag(new PosTag("非言語音"));
+         /**
+         *  fragment:
+         */
+        POS_TAG_SET.addTag(new PosTag("語断片"));
+         /**
+         * * * * *
+         *  unknown: unknown part of speech.
+         */
+        POS_TAG_SET.addTag(new PosTag("未知語",Pos.Foreign));
+    }
+}

Added: stanbol/branches/stanbol-solr4/enhancement-engines/kuromoji-nlp/src/main/java/org/apache/stanbol/enhancer/engines/kuromoji/impl/KuromojiNlpEngine.java
URL: http://svn.apache.org/viewvc/stanbol/branches/stanbol-solr4/enhancement-engines/kuromoji-nlp/src/main/java/org/apache/stanbol/enhancer/engines/kuromoji/impl/KuromojiNlpEngine.java?rev=1455131&view=auto
==============================================================================
--- stanbol/branches/stanbol-solr4/enhancement-engines/kuromoji-nlp/src/main/java/org/apache/stanbol/enhancer/engines/kuromoji/impl/KuromojiNlpEngine.java (added)
+++ stanbol/branches/stanbol-solr4/enhancement-engines/kuromoji-nlp/src/main/java/org/apache/stanbol/enhancer/engines/kuromoji/impl/KuromojiNlpEngine.java Mon Mar 11 13:18:59 2013
@@ -0,0 +1,438 @@
+/*
+ * Copyright (c) 2012 Sebastian Schaffert
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+
+package org.apache.stanbol.enhancer.engines.kuromoji.impl;
+
+import static org.apache.stanbol.enhancer.engines.kuromoji.Constants.NER_TAG_SET;
+import static org.apache.stanbol.enhancer.engines.kuromoji.Constants.POS_TAG_SET;
+import static org.apache.stanbol.enhancer.nlp.NlpAnnotations.MORPHO_ANNOTATION;
+import static org.apache.stanbol.enhancer.nlp.NlpAnnotations.POS_ANNOTATION;
+import static org.apache.stanbol.enhancer.nlp.utils.NlpEngineHelper.getLanguage;
+import static org.apache.stanbol.enhancer.nlp.utils.NlpEngineHelper.initAnalysedText;
+import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.DC_TYPE;
+import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_END;
+import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_SELECTED_TEXT;
+import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_SELECTION_CONTEXT;
+import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_START;
+
+import java.io.IOException;
+import java.io.StringReader;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.clerezza.rdf.core.Language;
+import org.apache.clerezza.rdf.core.LiteralFactory;
+import org.apache.clerezza.rdf.core.MGraph;
+import org.apache.clerezza.rdf.core.UriRef;
+import org.apache.clerezza.rdf.core.impl.PlainLiteralImpl;
+import org.apache.clerezza.rdf.core.impl.TripleImpl;
+import org.apache.commons.io.input.CharSequenceReader;
+import org.apache.felix.scr.annotations.Activate;
+import org.apache.felix.scr.annotations.Component;
+import org.apache.felix.scr.annotations.ConfigurationPolicy;
+import org.apache.felix.scr.annotations.Deactivate;
+import org.apache.felix.scr.annotations.Properties;
+import org.apache.felix.scr.annotations.Property;
+import org.apache.felix.scr.annotations.Reference;
+import org.apache.felix.scr.annotations.ReferenceCardinality;
+import org.apache.felix.scr.annotations.Service;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.ja.JapaneseBaseFormFilterFactory;
+import org.apache.lucene.analysis.ja.JapaneseKatakanaStemFilterFactory;
+import org.apache.lucene.analysis.ja.JapanesePartOfSpeechStopFilterFactory;
+import org.apache.lucene.analysis.ja.JapaneseTokenizerFactory;
+import org.apache.lucene.analysis.ja.tokenattributes.BaseFormAttribute;
+import org.apache.lucene.analysis.ja.tokenattributes.InflectionAttribute;
+import org.apache.lucene.analysis.ja.tokenattributes.PartOfSpeechAttribute;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
+import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
+import org.apache.lucene.analysis.util.ResourceLoader;
+import org.apache.lucene.analysis.util.ResourceLoaderAware;
+import org.apache.lucene.analysis.util.TokenFilterFactory;
+import org.apache.lucene.analysis.util.TokenizerFactory;
+import org.apache.lucene.util.Version;
+import org.apache.sling.installer.core.impl.OsgiInstallerImpl;
+import org.apache.stanbol.commons.solr.utils.StanbolResourceLoader;
+import org.apache.stanbol.enhancer.nlp.NlpAnnotations;
+import org.apache.stanbol.enhancer.nlp.NlpProcessingRole;
+import org.apache.stanbol.enhancer.nlp.NlpServiceProperties;
+import org.apache.stanbol.enhancer.nlp.model.AnalysedText;
+import org.apache.stanbol.enhancer.nlp.model.AnalysedTextFactory;
+import org.apache.stanbol.enhancer.nlp.model.Chunk;
+import org.apache.stanbol.enhancer.nlp.model.Sentence;
+import org.apache.stanbol.enhancer.nlp.model.Token;
+import org.apache.stanbol.enhancer.nlp.model.annotation.Value;
+import org.apache.stanbol.enhancer.nlp.morpho.MorphoFeatures;
+import org.apache.stanbol.enhancer.nlp.ner.NerTag;
+import org.apache.stanbol.enhancer.nlp.pos.Pos;
+import org.apache.stanbol.enhancer.nlp.pos.PosTag;
+import org.apache.stanbol.enhancer.nlp.utils.NlpEngineHelper;
+import org.apache.stanbol.enhancer.servicesapi.Blob;
+import org.apache.stanbol.enhancer.servicesapi.ContentItem;
+import org.apache.stanbol.enhancer.servicesapi.EngineException;
+import org.apache.stanbol.enhancer.servicesapi.EnhancementEngine;
+import org.apache.stanbol.enhancer.servicesapi.ServiceProperties;
+import org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper;
+import org.apache.stanbol.enhancer.servicesapi.impl.AbstractEnhancementEngine;
+import org.osgi.framework.Constants;
+import org.osgi.service.cm.ConfigurationException;
+import org.osgi.service.component.ComponentContext;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Sentence detection and word tokenizer for Chinese based on the Solr/Lucene
+ * smartcn analysers.
+ * 
+ * @author Rupert Westenthaler
+ */
+
+@Component(immediate = true, metatype = true, 
+    policy = ConfigurationPolicy.OPTIONAL) //create a default instance with the default configuration
+@Service
+@Properties(value={
+        @Property(name= EnhancementEngine.PROPERTY_NAME,value="kuromoji-token"),
+        @Property(name=Constants.SERVICE_RANKING,intValue=0) //give the default instance a ranking < 0
+})
+public class KuromojiNlpEngine extends AbstractEnhancementEngine<IOException,RuntimeException> implements ServiceProperties {
+
+    private static final Version LUCENE_VERSION = Version.LUCENE_41;
+    private static final String TOKENIZER_MODE = "search"; //normal, extended
+    private static final Map<String,Object> SERVICE_PROPERTIES;
+    private static final Map<String,String> TOKENIZER_FACTORY_CONFIG = new HashMap<String,String>();
+	private static final Map<String, String> BASE_FORM_FILTER_CONFIG = new HashMap<String,String>();
+	private static final Map<String, String> POS_FILTER_CONFIG = new HashMap<String,String>();
+	private static final Map<String, String> STEMM_FILTER_CONFIG = new HashMap<String,String>();
+    static {
+        Map<String,Object> props = new HashMap<String,Object>();
+        props.put(ServiceProperties.ENHANCEMENT_ENGINE_ORDERING, 
+            ServiceProperties.ORDERING_NLP_TOKENIZING);
+        props.put(NlpServiceProperties.ENHANCEMENT_ENGINE_NLP_ROLE, 
+            NlpProcessingRole.Tokenizing);
+        SERVICE_PROPERTIES = Collections.unmodifiableMap(props);
+
+        TOKENIZER_FACTORY_CONFIG.put("luceneMatchVersion", LUCENE_VERSION.toString());
+        TOKENIZER_FACTORY_CONFIG.put("mode",TOKENIZER_MODE);
+        //we want to have tokens for punctations
+        TOKENIZER_FACTORY_CONFIG.put("discardPunctuation", "false");
+        
+        BASE_FORM_FILTER_CONFIG.put("luceneMatchVersion", LUCENE_VERSION.toString());
+        
+        POS_FILTER_CONFIG.put("luceneMatchVersion", LUCENE_VERSION.toString());
+        POS_FILTER_CONFIG.put("tags", "nostoptags.txt");
+        POS_FILTER_CONFIG.put("enablePositionIncrements","true");
+        
+        STEMM_FILTER_CONFIG.put("luceneMatchVersion", LUCENE_VERSION.toString());
+        STEMM_FILTER_CONFIG.put("minimumLength","4");
+    }
+
+
+    private static Logger log = LoggerFactory.getLogger(KuromojiNlpEngine.class);
+    
+    @Reference(cardinality=ReferenceCardinality.OPTIONAL_UNARY)
+    protected ResourceLoader parentResourceLoader;
+
+    protected ResourceLoader resourceLoader;
+
+    //private MappingCharFilterFactory charFilterFactory;
+    private TokenizerFactory tokenizerFactory;
+    
+    private List<TokenFilterFactory> filterFactories = new ArrayList<TokenFilterFactory>();
+    
+    @Reference
+    protected AnalysedTextFactory analysedTextFactory;
+    
+    protected LiteralFactory lf = LiteralFactory.getInstance();
+    /**
+
+     * holds {@link PosTag}s that are not contained in the 
+     * {@link org.apache.stanbol.enhancer.engines.kuromoji.Constants#POS_TAG_SET}
+     */
+    private Map<String,PosTag> adhocTags = new HashMap<String,PosTag>();
+    
+    /**
+     * Indicate if this engine can enhance supplied ContentItem, and if it
+     * suggests enhancing it synchronously or asynchronously. The
+     * {@link org.apache.stanbol.enhancer.servicesapi.EnhancementJobManager} can force sync/async mode if desired, it is
+     * just a suggestion from the engine.
+     * <p/>
+     * Returns ENHANCE_ASYNC in case there is a text/plain content part and a tagger for the language identified for
+     * the content item, CANNOT_ENHANCE otherwise.
+     *
+     * @throws org.apache.stanbol.enhancer.servicesapi.EngineException
+     *          if the introspecting process of the content item
+     *          fails
+     */
+    @Override
+    public int canEnhance(ContentItem ci) throws EngineException {
+        // check if content is present
+        Map.Entry<UriRef,Blob> entry = NlpEngineHelper.getPlainText(this, ci, false);
+        if(entry == null || entry.getValue() == null) {
+            return CANNOT_ENHANCE;
+        }
+
+        String language = getLanguage(this,ci,false);
+        if("ja".equals(language) || (language != null && language.startsWith("ja-"))) {
+            log.trace(" > can enhance ContentItem {} with language {}",ci,language);
+            return ENHANCE_ASYNC;
+        } else {
+            return CANNOT_ENHANCE;
+        }
+    }
+
+    /**
+     * Compute enhancements for supplied ContentItem. The results of the process
+     * are expected to be stored in the metadata of the content item.
+     * <p/>
+     * The client (usually an {@link org.apache.stanbol.enhancer.servicesapi.EnhancementJobManager}) should take care of
+     * persistent storage of the enhanced {@link org.apache.stanbol.enhancer.servicesapi.ContentItem}.
+     * <p/>
+     * This method creates a new POSContentPart using {@link org.apache.stanbol.enhancer.engines.pos.api.POSTaggerHelper#createContentPart} from a text/plain part and
+     * stores it as a new part in the content item. The metadata is not changed.
+     *
+     * @throws org.apache.stanbol.enhancer.servicesapi.EngineException
+     *          if the underlying process failed to work as
+     *          expected
+     */
+    @Override
+    public void computeEnhancements(ContentItem ci) throws EngineException {
+        final AnalysedText at = initAnalysedText(this,analysedTextFactory,ci);
+
+        String language = getLanguage(this,ci,false);
+        if(!("ja".equals(language) || (language != null && language.startsWith("ja-")))) {
+            throw new IllegalStateException("The detected language is NOT 'ja'! "
+                + "As this is also checked within the #canEnhance(..) method this "
+                + "indicates an Bug in the used EnhancementJobManager implementation. "
+                + "Please report this on the dev@apache.stanbol.org or create an "
+                + "JIRA issue about this.");
+        }
+        //start with the Tokenizer
+        TokenStream tokenStream = tokenizerFactory.create(new CharSequenceReader(at.getText()));
+        //build the analyzing chain by adding all TokenFilters
+        for(TokenFilterFactory filterFactory : filterFactories){
+            tokenStream = filterFactory.create(tokenStream);
+        }
+
+        //Try to extract sentences based on POS tags ...
+        int sentStartOffset = -1;
+        //NER data
+        List<NerData> nerList = new ArrayList<NerData>();
+        int nerSentIndex = 0; //the next index where the NerData.context need to be set
+        NerData ner = null;
+        OffsetAttribute offset = null;
+        try {
+        	tokenStream.reset(); //required with Solr 4
+            while (tokenStream.incrementToken()){
+                offset = tokenStream.addAttribute(OffsetAttribute.class);
+                Token token = at.addToken(offset.startOffset(), offset.endOffset());
+                //Get the POS attribute and init the PosTag
+                PartOfSpeechAttribute posAttr = tokenStream.addAttribute(PartOfSpeechAttribute.class);
+                PosTag posTag = POS_TAG_SET.getTag(posAttr.getPartOfSpeech());
+                if(posTag == null){
+                    posTag = adhocTags.get(posAttr.getPartOfSpeech());
+                    if(posTag == null){
+                        posTag = new PosTag(posAttr.getPartOfSpeech());
+                        adhocTags.put(posAttr.getPartOfSpeech(), posTag);
+                        log.warn(" ... missing PosTag mapping for {}",posAttr.getPartOfSpeech());
+                    }
+                }
+                //Sentence detection by POS tag
+                if(sentStartOffset < 0){ //the last token was a sentence ending
+                	sentStartOffset = offset.startOffset();
+                }
+                if(posTag.hasPos(Pos.Point)) { 
+                    Sentence sent = at.addSentence(sentStartOffset, offset.startOffset());
+                    //add the sentence as context to the NerData instances
+                    while(nerSentIndex < nerList.size()){
+                        nerList.get(nerSentIndex).context = sent.getSpan();
+                        nerSentIndex++;
+                    }
+                    sentStartOffset = -1;
+                }
+                //POS
+                token.addAnnotation(POS_ANNOTATION, Value.value(posTag));
+                //NER
+                NerTag nerTag = NER_TAG_SET.getTag(posAttr.getPartOfSpeech());
+                if(ner != null && (nerTag == null || !ner.tag.getType().equals(nerTag.getType()))){
+                    //write NER annotation
+                    Chunk chunk = at.addChunk(ner.start, ner.end);
+                    chunk.addAnnotation(NlpAnnotations.NER_ANNOTATION, Value.value(ner.tag));
+                    //NOTE that the fise:TextAnnotation are written later based on the nerList
+                    //clean up
+                    ner = null;
+                }
+                if(nerTag != null){
+                    if(ner == null){
+                        ner = new NerData(nerTag, offset.startOffset());
+                        nerList.add(ner);
+                    }
+                    ner.end = offset.endOffset();
+                }
+                BaseFormAttribute baseFormAttr = tokenStream.addAttribute(BaseFormAttribute.class);
+                MorphoFeatures morpho = null;
+                if(baseFormAttr != null && baseFormAttr.getBaseForm() != null){
+                	morpho = new MorphoFeatures(baseFormAttr.getBaseForm());
+                	morpho.addPos(posTag); //and add the posTag
+                }
+                InflectionAttribute inflectionAttr = tokenStream.addAttribute(InflectionAttribute.class);
+                inflectionAttr.getInflectionForm();
+                inflectionAttr.getInflectionType();
+                if(morpho != null){ //if present add the morpho
+                	token.addAnnotation(MORPHO_ANNOTATION, Value.value(morpho));
+                }
+            }
+            //we still need to write the last sentence
+            Sentence lastSent = null;
+            if(offset != null && sentStartOffset >= 0 && offset.endOffset() > sentStartOffset){
+                lastSent = at.addSentence(sentStartOffset, offset.endOffset());
+            }
+            //and set the context off remaining named entities
+            while(nerSentIndex < nerList.size()){
+                if(lastSent != null){
+                    nerList.get(nerSentIndex).context = lastSent.getSpan();
+                } else { //no sentence detected
+                    nerList.get(nerSentIndex).context = at.getSpan();
+                }
+                nerSentIndex++;
+            }
+        } catch (IOException e) {
+            throw new EngineException(this, ci, "Exception while reading from "
+                + "AnalyzedText contentpart",e);
+        } finally {
+            try {
+                tokenStream.close();
+            } catch (IOException e) {/* ignore */}
+        }
+        //finally write the NER annotations to the metadata of the ContentItem
+        final MGraph metadata = ci.getMetadata();
+        ci.getLock().writeLock().lock();
+        try {
+            Language lang = new Language("ja");
+            for(NerData nerData : nerList){
+                UriRef ta = EnhancementEngineHelper.createTextEnhancement(ci, this);
+                metadata.add(new TripleImpl(ta, ENHANCER_SELECTED_TEXT, new PlainLiteralImpl(
+                    at.getSpan().substring(nerData.start, nerData.end),lang)));
+                metadata.add(new TripleImpl(ta, DC_TYPE, nerData.tag.getType()));
+                metadata.add(new TripleImpl(ta, ENHANCER_START, lf.createTypedLiteral(nerData.start)));
+                metadata.add(new TripleImpl(ta, ENHANCER_END, lf.createTypedLiteral(nerData.end)));
+                metadata.add(new TripleImpl(ta, ENHANCER_SELECTION_CONTEXT, 
+                    new PlainLiteralImpl(nerData.context, lang)));
+            }
+        } finally{
+            ci.getLock().writeLock().unlock();
+        }
+    }
+
+    @Override
+    public Map<String,Object> getServiceProperties() {
+        return SERVICE_PROPERTIES;
+    }
+    /**
+     * Activate and read the properties. Configures and initialises a POSTagger for each language configured in
+     * CONFIG_LANGUAGES.
+     *
+     * @param ce the {@link org.osgi.service.component.ComponentContext}
+     */
+    @Activate
+    protected void activate(ComponentContext ce) throws ConfigurationException, IOException {
+        log.info("activating smartcn tokenizing engine");
+        super.activate(ce);
+        //init the Solr ResourceLoader used for initialising the components
+        resourceLoader = new StanbolResourceLoader(parentResourceLoader);
+        tokenizerFactory = new JapaneseTokenizerFactory();
+        tokenizerFactory.init(TOKENIZER_FACTORY_CONFIG);
+        tokenizerFactory.setLuceneMatchVersion(LUCENE_VERSION);
+        ((ResourceLoaderAware) tokenizerFactory).inform(resourceLoader);
+        //base form filter
+        TokenFilterFactory baseFormFilterFactory =  new JapaneseBaseFormFilterFactory();
+        baseFormFilterFactory.init(BASE_FORM_FILTER_CONFIG);
+        baseFormFilterFactory.setLuceneMatchVersion(LUCENE_VERSION);
+        filterFactories.add(baseFormFilterFactory);
+        //POS filter
+        TokenFilterFactory posFilterFactory = new JapanesePartOfSpeechStopFilterFactory();
+        posFilterFactory.init(POS_FILTER_CONFIG);
+        posFilterFactory.setLuceneMatchVersion(LUCENE_VERSION);
+        ((ResourceLoaderAware) posFilterFactory).inform(resourceLoader);
+        filterFactories.add(posFilterFactory);
+        //Stemming
+        TokenFilterFactory stemmFilterFactory = new JapaneseKatakanaStemFilterFactory();
+        stemmFilterFactory.init(STEMM_FILTER_CONFIG);
+        stemmFilterFactory.setLuceneMatchVersion(LUCENE_VERSION);
+        filterFactories.add(stemmFilterFactory);
+    }
+    
+    @Deactivate
+    protected void deactivate(ComponentContext context) {
+    	tokenizerFactory = null;
+    	filterFactories.clear();
+    	filterFactories = null;
+        super.deactivate(context);
+    }
+
+    /**
+     * This is an internal helper class that avoids to execute sentences
+     * using the {@link SentenceTokenizer} twice.
+     * @author Rupert Westenthaler
+     *
+     */
+    protected final class AnalyzedTextSentenceTokenizer extends Tokenizer {
+        private final AnalysedText at;
+        private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
+        private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
+        private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class);
+        private Iterator<Sentence> sentences;
+        private Sentence sentence = null;
+
+        protected AnalyzedTextSentenceTokenizer(AnalysedText at) {
+            super(new StringReader(at.getText().toString()));
+            this.at = at;
+            sentences = at.getSentences();
+        }
+
+        @Override
+        public boolean incrementToken() throws IOException {
+            if(sentences.hasNext()){
+                sentence = sentences.next();
+                termAtt.setEmpty().append(sentence.getSpan());
+                offsetAtt.setOffset(sentence.getStart(),sentence.getEnd());
+                typeAtt.setType("sentence");
+                return true;
+            } else {
+                return false;
+            }
+        }
+
+        @Override
+        public void end() throws IOException {
+          // set final offset
+          offsetAtt.setOffset(at.getEnd(), at.getEnd());
+        }
+        @Override
+        public void reset() throws IOException {
+            super.reset();
+            sentences = at.getSentences();
+            termAtt.setEmpty();
+            offsetAtt.setOffset(0, 0);
+            typeAtt.setType(null);
+        }
+    }
+}

Added: stanbol/branches/stanbol-solr4/enhancement-engines/kuromoji-nlp/src/main/java/org/apache/stanbol/enhancer/engines/kuromoji/impl/NerData.java
URL: http://svn.apache.org/viewvc/stanbol/branches/stanbol-solr4/enhancement-engines/kuromoji-nlp/src/main/java/org/apache/stanbol/enhancer/engines/kuromoji/impl/NerData.java?rev=1455131&view=auto
==============================================================================
--- stanbol/branches/stanbol-solr4/enhancement-engines/kuromoji-nlp/src/main/java/org/apache/stanbol/enhancer/engines/kuromoji/impl/NerData.java (added)
+++ stanbol/branches/stanbol-solr4/enhancement-engines/kuromoji-nlp/src/main/java/org/apache/stanbol/enhancer/engines/kuromoji/impl/NerData.java Mon Mar 11 13:18:59 2013
@@ -0,0 +1,25 @@
+package org.apache.stanbol.enhancer.engines.kuromoji.impl;
+
+import org.apache.stanbol.enhancer.nlp.ner.NerTag;
+import org.apache.stanbol.enhancer.servicesapi.ContentItem;
+
+/**
+ * Used as intermediate representation of NER annotations so that one needs
+ * not to obtain a write lock on the {@link ContentItem} for each detected 
+ * entity
+ * @author Rupert Westenthaler
+ *
+ */
+class NerData {
+    
+    protected final NerTag tag;
+    protected final int start;
+    protected int end;
+    protected String context;
+    
+    protected NerData(NerTag ner, int start){
+        this.tag = ner;
+        this.start = start;
+    }
+    
+}
\ No newline at end of file

Added: stanbol/branches/stanbol-solr4/enhancement-engines/kuromoji-nlp/src/main/resources/OSGI-INF/metatype/metatype.properties
URL: http://svn.apache.org/viewvc/stanbol/branches/stanbol-solr4/enhancement-engines/kuromoji-nlp/src/main/resources/OSGI-INF/metatype/metatype.properties?rev=1455131&view=auto
==============================================================================
--- stanbol/branches/stanbol-solr4/enhancement-engines/kuromoji-nlp/src/main/resources/OSGI-INF/metatype/metatype.properties (added)
+++ stanbol/branches/stanbol-solr4/enhancement-engines/kuromoji-nlp/src/main/resources/OSGI-INF/metatype/metatype.properties Mon Mar 11 13:18:59 2013
@@ -0,0 +1,31 @@
+#  Licensed to the Apache Software Foundation (ASF) under one
+#  or more contributor license agreements.  See the NOTICE file
+#  distributed with this work for additional information
+#  regarding copyright ownership.  The ASF licenses this file
+#  to you under the Apache License, Version 2.0 (the
+#  "License"); you may not use this file except in compliance
+#  with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing,
+#  software distributed under the License is distributed on an
+#  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+#  KIND, either express or implied.  See the License for the
+#  specific language governing permissions and limitations
+#  under the License.
+#
+
+org.apache.stanbol.enhancer.engines.smartcn.impl.SmartcnTokenizerEngine.name=Apache \
+Stanbol Enhancer Engine: Smartcn Tokenizer
+org.apache.stanbol.enhancer.engines.opennlp.token.impl.OpenNlpTokenizerEngine.description=Enhancement \
+Engine that detect sentences and tokenizes Chinese text by using the Solr/Lucene \
+smartcn analyzers.
+
+
+stanbol.enhancer.engine.name.name=Name
+stanbol.enhancer.engine.name.description=The name of the enhancement engine as \
+used in the RESTful interface '/engine/<name>'
+service.ranking.name=Ranking
+service.ranking.description=If two enhancement engines with the same name are active the \
+one with the higher ranking will be used to process parsed content items.

Added: stanbol/branches/stanbol-solr4/enhancement-engines/kuromoji-nlp/src/main/resources/nostoptags.txt
URL: http://svn.apache.org/viewvc/stanbol/branches/stanbol-solr4/enhancement-engines/kuromoji-nlp/src/main/resources/nostoptags.txt?rev=1455131&view=auto
==============================================================================
--- stanbol/branches/stanbol-solr4/enhancement-engines/kuromoji-nlp/src/main/resources/nostoptags.txt (added)
+++ stanbol/branches/stanbol-solr4/enhancement-engines/kuromoji-nlp/src/main/resources/nostoptags.txt Mon Mar 11 13:18:59 2013
@@ -0,0 +1 @@
+# this file is loaded by the POS Filter Factory
\ No newline at end of file

Added: stanbol/branches/stanbol-solr4/enhancement-engines/kuromoji-nlp/src/test/java/org/apache/stanbol/enhancer/engines/kuromoji/impl/ClasspathDataFileProvider.java
URL: http://svn.apache.org/viewvc/stanbol/branches/stanbol-solr4/enhancement-engines/kuromoji-nlp/src/test/java/org/apache/stanbol/enhancer/engines/kuromoji/impl/ClasspathDataFileProvider.java?rev=1455131&view=auto
==============================================================================
--- stanbol/branches/stanbol-solr4/enhancement-engines/kuromoji-nlp/src/test/java/org/apache/stanbol/enhancer/engines/kuromoji/impl/ClasspathDataFileProvider.java (added)
+++ stanbol/branches/stanbol-solr4/enhancement-engines/kuromoji-nlp/src/test/java/org/apache/stanbol/enhancer/engines/kuromoji/impl/ClasspathDataFileProvider.java Mon Mar 11 13:18:59 2013
@@ -0,0 +1,79 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.stanbol.enhancer.engines.kuromoji.impl;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.net.URL;
+import java.util.Map;
+
+import org.apache.stanbol.commons.stanboltools.datafileprovider.DataFileProvider;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/** DataFileProvider that looks in our class resources */
+public class ClasspathDataFileProvider implements DataFileProvider {
+
+    private final Logger log = LoggerFactory.getLogger(getClass());
+    /*
+     * NOTE: This path needs to be the same as path configured for the 
+     *   'org.apache.stanbol:org.apache.stanbol.commons.solr.extras.gosen'
+     *   bundle
+     */
+    public static final String RESOURCE_BASE_PATH = "datafiles/";
+    
+    private final String symbolicName;
+    
+    ClasspathDataFileProvider(String bundleSymbolicName) {
+        symbolicName = bundleSymbolicName;
+    }
+    
+    @Override
+    public InputStream getInputStream(String bundleSymbolicName,
+            String filename, Map<String, String> comments) 
+    throws IOException {
+        final URL dataFile = getDataFile(bundleSymbolicName, filename);
+        
+        // Returning null is fine - if we don't have the data file, another
+        // provider might supply it
+        return dataFile != null ? dataFile.openStream() : null;
+    }
+    @Override
+    public boolean isAvailable(String bundleSymbolicName, String filename, Map<String,String> comments) {
+        return getDataFile(bundleSymbolicName, filename) != null;
+    }
+    /**
+     * @param bundleSymbolicName
+     * @param filename
+     * @return
+     */
+    private URL getDataFile(String bundleSymbolicName, String filename) {
+        //If the symbolic name is not null check that is equals to the symbolic
+        //name used to create this classpath data file provider
+        if(bundleSymbolicName != null && !symbolicName.equals(bundleSymbolicName)) {
+            log.debug("Requested bundleSymbolicName {} does not match mine ({}), request ignored",
+                    bundleSymbolicName, symbolicName);
+            return null;
+        }
+        
+        // load default OpenNLP models from classpath (embedded in the defaultdata bundle)
+        final String resourcePath = RESOURCE_BASE_PATH + filename;
+        final URL dataFile = getClass().getClassLoader().getResource(resourcePath);
+        //log.debug("Resource {} found: {}", (in == null ? "NOT" : ""), resourcePath);
+        return dataFile;
+    }
+}

Added: stanbol/branches/stanbol-solr4/enhancement-engines/kuromoji-nlp/src/test/java/org/apache/stanbol/enhancer/engines/kuromoji/impl/MockComponentContext.java
URL: http://svn.apache.org/viewvc/stanbol/branches/stanbol-solr4/enhancement-engines/kuromoji-nlp/src/test/java/org/apache/stanbol/enhancer/engines/kuromoji/impl/MockComponentContext.java?rev=1455131&view=auto
==============================================================================
--- stanbol/branches/stanbol-solr4/enhancement-engines/kuromoji-nlp/src/test/java/org/apache/stanbol/enhancer/engines/kuromoji/impl/MockComponentContext.java (added)
+++ stanbol/branches/stanbol-solr4/enhancement-engines/kuromoji-nlp/src/test/java/org/apache/stanbol/enhancer/engines/kuromoji/impl/MockComponentContext.java Mon Mar 11 13:18:59 2013
@@ -0,0 +1,205 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.stanbol.enhancer.engines.kuromoji.impl;
+
+import java.io.File;
+import java.io.InputStream;
+import java.util.Dictionary;
+import java.util.Hashtable;
+
+import org.osgi.framework.Bundle;
+import org.osgi.framework.BundleContext;
+import org.osgi.framework.BundleException;
+import org.osgi.framework.BundleListener;
+import org.osgi.framework.Filter;
+import org.osgi.framework.FrameworkListener;
+import org.osgi.framework.InvalidSyntaxException;
+import org.osgi.framework.ServiceListener;
+import org.osgi.framework.ServiceReference;
+import org.osgi.framework.ServiceRegistration;
+import org.osgi.service.component.ComponentContext;
+import org.osgi.service.component.ComponentInstance;
+
+public class MockComponentContext implements ComponentContext {
+
+    protected final Dictionary<String, Object> properties;
+    protected final BundleContext bundleContext = new MockBundleContext();
+
+    public MockComponentContext() {
+        properties = new Hashtable<String, Object>();
+    }
+
+    public MockComponentContext(Dictionary<String, Object> properties) {
+        this.properties = properties;
+    }
+
+    public void disableComponent(String name) {
+        throw new UnsupportedOperationException("Mock implementation");
+    }
+
+    public void enableComponent(String name) {
+        throw new UnsupportedOperationException("Mock implementation");
+    }
+
+    public BundleContext getBundleContext() {
+        return bundleContext;
+    }
+
+    public ComponentInstance getComponentInstance() {
+        throw new UnsupportedOperationException("Mock implementation");
+    }
+
+    public Dictionary<String, Object> getProperties() {
+        return properties;
+    }
+
+    public ServiceReference getServiceReference() {
+        throw new UnsupportedOperationException("Mock implementation");
+    }
+
+    public Bundle getUsingBundle() {
+        throw new UnsupportedOperationException("Mock implementation");
+    }
+
+    public Object locateService(String name) {
+        throw new UnsupportedOperationException("Mock implementation");
+    }
+
+    public Object locateService(String name, ServiceReference reference) {
+        throw new UnsupportedOperationException("Mock implementation");
+    }
+
+    public Object[] locateServices(String name) {
+        throw new UnsupportedOperationException("Mock implementation");
+    }
+
+    private static final class MockBundleContext implements BundleContext {
+        /**
+         * Used by the Engine to read System properties
+         */
+        @Override
+        public String getProperty(String key) {
+            return System.getProperty(key);
+        }
+
+        @Override
+        public Bundle getBundle() {
+            throw new UnsupportedOperationException("Mock implementation");
+        }
+
+        @Override
+        public Bundle installBundle(String location) throws BundleException {
+            throw new UnsupportedOperationException("Mock implementation");
+        }
+
+        @Override
+        public Bundle installBundle(String location, InputStream input) throws BundleException {
+            throw new UnsupportedOperationException("Mock implementation");
+        }
+
+        @Override
+        public Bundle getBundle(long id) {
+            throw new UnsupportedOperationException("Mock implementation");
+        }
+
+        @Override
+        public Bundle[] getBundles() {
+            throw new UnsupportedOperationException("Mock implementation");
+        }
+
+        @Override
+        public void addServiceListener(ServiceListener listener, String filter) throws InvalidSyntaxException {
+            throw new UnsupportedOperationException("Mock implementation");
+        }
+
+        @Override
+        public void addServiceListener(ServiceListener listener) {
+            throw new UnsupportedOperationException("Mock implementation");
+        }
+
+        @Override
+        public void removeServiceListener(ServiceListener listener) {
+            throw new UnsupportedOperationException("Mock implementation");
+        }
+
+        @Override
+        public void addBundleListener(BundleListener listener) {
+            throw new UnsupportedOperationException("Mock implementation");
+        }
+
+        @Override
+        public void removeBundleListener(BundleListener listener) {
+            throw new UnsupportedOperationException("Mock implementation");
+        }
+
+        @Override
+        public void addFrameworkListener(FrameworkListener listener) {
+            throw new UnsupportedOperationException("Mock implementation");
+        }
+
+        @Override
+        public void removeFrameworkListener(FrameworkListener listener) {
+            throw new UnsupportedOperationException("Mock implementation");
+        }
+
+        @Override
+        public ServiceRegistration registerService(String[] clazzes, Object service, Dictionary properties) {
+            throw new UnsupportedOperationException("Mock implementation");
+        }
+
+        @Override
+        public ServiceRegistration registerService(String clazz, Object service, Dictionary properties) {
+            throw new UnsupportedOperationException("Mock implementation");
+        }
+
+        @Override
+        public ServiceReference[] getServiceReferences(String clazz, String filter) throws InvalidSyntaxException {
+            throw new UnsupportedOperationException("Mock implementation");
+        }
+
+        @Override
+        public ServiceReference[] getAllServiceReferences(String clazz, String filter) throws InvalidSyntaxException {
+            throw new UnsupportedOperationException("Mock implementation");
+        }
+
+        @Override
+        public ServiceReference getServiceReference(String clazz) {
+            throw new UnsupportedOperationException("Mock implementation");
+        }
+
+        @Override
+        public Object getService(ServiceReference reference) {
+            throw new UnsupportedOperationException("Mock implementation");
+        }
+
+        @Override
+        public boolean ungetService(ServiceReference reference) {
+            throw new UnsupportedOperationException("Mock implementation");
+        }
+
+        @Override
+        public File getDataFile(String filename) {
+            throw new UnsupportedOperationException("Mock implementation");
+        }
+
+        @Override
+        public Filter createFilter(String filter) throws InvalidSyntaxException {
+            throw new UnsupportedOperationException("Mock implementation");
+        }
+        
+    }
+}