You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@stanbol.apache.org by rw...@apache.org on 2013/03/11 14:19:02 UTC

svn commit: r1455131 [7/7] - in /stanbol/branches/stanbol-solr4: commons/ commons/frameworkfragment/ commons/solr/core/ commons/solr/core/src/main/java/org/apache/stanbol/commons/solr/ commons/solr/core/src/main/java/org/apache/stanbol/commons/solr/uti...

Modified: stanbol/branches/stanbol-solr4/entityhub/indexing/destination/solryard/src/test/resources/testConfigs/withSolrConf/indexing/config/simple/conf/spellings.txt
URL: http://svn.apache.org/viewvc/stanbol/branches/stanbol-solr4/entityhub/indexing/destination/solryard/src/test/resources/testConfigs/withSolrConf/indexing/config/simple/conf/spellings.txt?rev=1455131&r1=1455130&r2=1455131&view=diff
==============================================================================
--- stanbol/branches/stanbol-solr4/entityhub/indexing/destination/solryard/src/test/resources/testConfigs/withSolrConf/indexing/config/simple/conf/spellings.txt (original)
+++ stanbol/branches/stanbol-solr4/entityhub/indexing/destination/solryard/src/test/resources/testConfigs/withSolrConf/indexing/config/simple/conf/spellings.txt Mon Mar 11 13:18:59 2013
@@ -1,2 +1,2 @@
-pizza
-history
\ No newline at end of file
+pizza
+history

Modified: stanbol/branches/stanbol-solr4/entityhub/indexing/destination/solryard/src/test/resources/testConfigs/withSolrConf/indexing/config/simple/conf/stopwords.txt
URL: http://svn.apache.org/viewvc/stanbol/branches/stanbol-solr4/entityhub/indexing/destination/solryard/src/test/resources/testConfigs/withSolrConf/indexing/config/simple/conf/stopwords.txt?rev=1455131&r1=1455130&r2=1455131&view=diff
==============================================================================
--- stanbol/branches/stanbol-solr4/entityhub/indexing/destination/solryard/src/test/resources/testConfigs/withSolrConf/indexing/config/simple/conf/stopwords.txt (original)
+++ stanbol/branches/stanbol-solr4/entityhub/indexing/destination/solryard/src/test/resources/testConfigs/withSolrConf/indexing/config/simple/conf/stopwords.txt Mon Mar 11 13:18:59 2013
@@ -1,53 +1,14 @@
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-#Standard english stop words taken from Lucene's StopAnalyzer
-a
-an
-and
-are
-as
-at
-be
-but
-by
-for
-if
-in
-into
-is
-it
-no
-not
-of
-on
-or
-s
-such
-t
-that
-the
-their
-then
-there
-these
-they
-this
-to
-was
-will
-with
-
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.

Added: stanbol/branches/stanbol-solr4/entityhub/indexing/destination/solryard/src/test/resources/testConfigs/withSolrConf/indexing/config/simple/conf/stopwords_en.txt
URL: http://svn.apache.org/viewvc/stanbol/branches/stanbol-solr4/entityhub/indexing/destination/solryard/src/test/resources/testConfigs/withSolrConf/indexing/config/simple/conf/stopwords_en.txt?rev=1455131&view=auto
==============================================================================
--- stanbol/branches/stanbol-solr4/entityhub/indexing/destination/solryard/src/test/resources/testConfigs/withSolrConf/indexing/config/simple/conf/stopwords_en.txt (added)
+++ stanbol/branches/stanbol-solr4/entityhub/indexing/destination/solryard/src/test/resources/testConfigs/withSolrConf/indexing/config/simple/conf/stopwords_en.txt Mon Mar 11 13:18:59 2013
@@ -0,0 +1,54 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# a couple of test stopwords to test that the words are really being
+# configured from this file:
+stopworda
+stopwordb
+
+# Standard english stop words taken from Lucene's StopAnalyzer
+a
+an
+and
+are
+as
+at
+be
+but
+by
+for
+if
+in
+into
+is
+it
+no
+not
+of
+on
+or
+such
+that
+the
+their
+then
+there
+these
+they
+this
+to
+was
+will
+with

Modified: stanbol/branches/stanbol-solr4/entityhub/indexing/destination/solryard/src/test/resources/testConfigs/withSolrConf/indexing/config/simple/conf/synonyms.txt
URL: http://svn.apache.org/viewvc/stanbol/branches/stanbol-solr4/entityhub/indexing/destination/solryard/src/test/resources/testConfigs/withSolrConf/indexing/config/simple/conf/synonyms.txt?rev=1455131&r1=1455130&r2=1455131&view=diff
==============================================================================
--- stanbol/branches/stanbol-solr4/entityhub/indexing/destination/solryard/src/test/resources/testConfigs/withSolrConf/indexing/config/simple/conf/synonyms.txt (original)
+++ stanbol/branches/stanbol-solr4/entityhub/indexing/destination/solryard/src/test/resources/testConfigs/withSolrConf/indexing/config/simple/conf/synonyms.txt Mon Mar 11 13:18:59 2013
@@ -1,22 +1,29 @@
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# Some synonym groups specific to this example
-GB,gib,gigabyte,gigabytes
-MB,mib,megabyte,megabytes
-Television, Televisions, TV, TVs
-#notice we use "gib" instead of "GiB" so any WordDelimiterFilter coming
-#after us won't split it into two words.
-
-# Synonym mappings can be used for spelling correction too
-pixima => pixma
-
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#-----------------------------------------------------------------------
+#some test synonym mappings unlikely to appear in real input text
+aaafoo => aaabar
+bbbfoo => bbbfoo bbbbar
+cccfoo => cccbar cccbaz
+fooaaa,baraaa,bazaaa
+
+# Some synonym groups specific to this example
+GB,gib,gigabyte,gigabytes
+MB,mib,megabyte,megabytes
+Television, Televisions, TV, TVs
+#notice we use "gib" instead of "GiB" so any WordDelimiterFilter coming
+#after us won't split it into two words.
+
+# Synonym mappings can be used for spelling correction too
+pixima => pixma
+

Modified: stanbol/branches/stanbol-solr4/entityhub/ldpath/pom.xml
URL: http://svn.apache.org/viewvc/stanbol/branches/stanbol-solr4/entityhub/ldpath/pom.xml?rev=1455131&r1=1455130&r2=1455131&view=diff
==============================================================================
--- stanbol/branches/stanbol-solr4/entityhub/ldpath/pom.xml (original)
+++ stanbol/branches/stanbol-solr4/entityhub/ldpath/pom.xml Mon Mar 11 13:18:59 2013
@@ -130,7 +130,7 @@
     <dependency>
       <groupId>org.apache.stanbol</groupId>
       <artifactId>org.apache.stanbol.entityhub.yard.solr</artifactId>
-      <version>0.11.0</version>
+      <version>0.12.0-SNAPSHOT</version>
       <scope>test</scope>
     </dependency>
     <dependency>

Modified: stanbol/branches/stanbol-solr4/entityhub/yard/solr/pom.xml
URL: http://svn.apache.org/viewvc/stanbol/branches/stanbol-solr4/entityhub/yard/solr/pom.xml?rev=1455131&r1=1455130&r2=1455131&view=diff
==============================================================================
--- stanbol/branches/stanbol-solr4/entityhub/yard/solr/pom.xml (original)
+++ stanbol/branches/stanbol-solr4/entityhub/yard/solr/pom.xml Mon Mar 11 13:18:59 2013
@@ -105,12 +105,12 @@
     <dependency> <!-- provides Solr and OSGI utilities for Solr -->
       <groupId>org.apache.stanbol</groupId>
       <artifactId>org.apache.stanbol.commons.solr.core</artifactId>
-      <version>0.11.0</version>
+      <version>0.12.0-SNAPSHOT</version>
     </dependency>
     <dependency> <!-- provides managed Solr servers -->
       <groupId>org.apache.stanbol</groupId>
       <artifactId>org.apache.stanbol.commons.solr.managed</artifactId>
-      <version>0.11.0</version>
+      <version>0.12.0-SNAPSHOT</version>
     </dependency>
 
     <!-- Stanbol Entityhub internal dependencies -->

Modified: stanbol/branches/stanbol-solr4/entityhub/yard/solr/src/main/java/org/apache/stanbol/entityhub/yard/solr/impl/SolrYard.java
URL: http://svn.apache.org/viewvc/stanbol/branches/stanbol-solr4/entityhub/yard/solr/src/main/java/org/apache/stanbol/entityhub/yard/solr/impl/SolrYard.java?rev=1455131&r1=1455130&r2=1455131&view=diff
==============================================================================
--- stanbol/branches/stanbol-solr4/entityhub/yard/solr/src/main/java/org/apache/stanbol/entityhub/yard/solr/impl/SolrYard.java (original)
+++ stanbol/branches/stanbol-solr4/entityhub/yard/solr/src/main/java/org/apache/stanbol/entityhub/yard/solr/impl/SolrYard.java Mon Mar 11 13:18:59 2013
@@ -515,15 +515,15 @@ public class SolrYard extends AbstractYa
             server = _registeredServerTracker.getService();
             //TODO: remove and replace with a setting where the SolrYard does not
             //      not activate until the SolrServer is available.
-            if(server == null){
-                for(int i = 0;i<5;i++){//waiting for a maximum of 5sec 
-                    try {
-                        log.info(" ... waiting 1sec for SolrServer");
-                        
-                        server = (SolrServer)_registeredServerTracker.waitForService(1000);
-                    } catch (InterruptedException e) {}
-                }
-            }
+//            if(server == null){
+//                for(int i = 0;i<5;i++){//waiting for a maximum of 5sec 
+//                    try {
+//                        log.info(" ... waiting 1sec for SolrServer");
+//                        
+//                        server = (SolrServer)_registeredServerTracker.waitForService(1000);
+//                    } catch (InterruptedException e) {}
+//                }
+//            }
             if(server != null && !server.equals(this._server)){
                 //reset the fieldMapper so that it is reinitialised for the new one
                 //STANBOL-519

Modified: stanbol/branches/stanbol-solr4/entityhub/yard/solr/src/main/java/org/apache/stanbol/entityhub/yard/solr/query/QueryUtils.java
URL: http://svn.apache.org/viewvc/stanbol/branches/stanbol-solr4/entityhub/yard/solr/src/main/java/org/apache/stanbol/entityhub/yard/solr/query/QueryUtils.java?rev=1455131&r1=1455130&r2=1455131&view=diff
==============================================================================
--- stanbol/branches/stanbol-solr4/entityhub/yard/solr/src/main/java/org/apache/stanbol/entityhub/yard/solr/query/QueryUtils.java (original)
+++ stanbol/branches/stanbol-solr4/entityhub/yard/solr/src/main/java/org/apache/stanbol/entityhub/yard/solr/query/QueryUtils.java Mon Mar 11 13:18:59 2013
@@ -27,9 +27,12 @@ import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 
 import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.icu.segmentation.DefaultICUTokenizerConfig;
+import org.apache.lucene.analysis.icu.segmentation.ICUTokenizer;
+import org.apache.lucene.analysis.icu.segmentation.ICUTokenizerConfig;
+import org.apache.lucene.analysis.icu.segmentation.ICUTokenizerFactory;
 import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
-import org.apache.solr.analysis.ICUTokenizerFactory;
-import org.apache.solr.analysis.TokenizerFactory;
+import org.apache.lucene.analysis.util.TokenizerFactory;
 import org.apache.stanbol.commons.solr.utils.SolrUtil;
 import org.apache.stanbol.entityhub.yard.solr.defaults.IndexDataTypeEnum;
 import org.apache.stanbol.entityhub.yard.solr.model.IndexValue;
@@ -38,18 +41,14 @@ import org.apache.stanbol.entityhub.yard
 public final class QueryUtils {
     private QueryUtils() {}
     /**
-     * The {@link TokenizerFactory} used to create Tokens for parsed 
-     * {@link IndexValue#getValue()} in case <code>false</code> is parsed for
-     * the tokenize property of {@link #encodeQueryValue(IndexValue, boolean)}.
-     * <p>
-     * Currently the {@link ICUTokenizerFactory} is used for Tokenizing.
+     * The {@link DefaultICUTokenizerConfig}
      */
-    private final static TokenizerFactory tokenizerFactory = new ICUTokenizerFactory();
+    private final static ICUTokenizerConfig tokenizerConfig = new DefaultICUTokenizerConfig();
     /**
      * Regex patter that searches for Wildcard chars '*' and '?' excluding
      * escaped versions '\*' and '\?'
      */
-    private final static Pattern wILDCARD_QUERY_CHAR_PATTERN = Pattern.compile("[^\\\\][\\*\\?]");
+    private final static Pattern WILDCARD_QUERY_CHAR_PATTERN = Pattern.compile("[^\\\\][\\*\\?]");
     
     /**
      * This method encodes a parsed index value as needed for queries.
@@ -76,7 +75,7 @@ public final class QueryUtils {
      * instead
      * </ul>
      * 
-     * @param value
+     * @param indexValue
      *            the index value
      * @param escape if <code>true</code> all Solr special chars are escaped if
      * <code>false</code> than '*' and '?' as used for wildcard searches are
@@ -182,8 +181,8 @@ public final class QueryUtils {
     private static String[] parseWildcardQueryTerms(String value,boolean loewercaseWildcardTokens) {
         //This assumes that the Tokenizer does tokenize '*' and '?',
         //what makes it a little bit tricky. 
-        Tokenizer tokenizer = tokenizerFactory.create(new StringReader(value));
-        Matcher m = wILDCARD_QUERY_CHAR_PATTERN.matcher(value);
+        Tokenizer tokenizer = new ICUTokenizer(new StringReader(value),tokenizerConfig);
+        Matcher m = WILDCARD_QUERY_CHAR_PATTERN.matcher(value);
         int next = m.find()?m.start()+1:-1;
         if(next < 0){ //No wildcard
             return new String[]{'"'+value+'"'};
@@ -194,6 +193,7 @@ public final class QueryUtils {
         boolean foundWildcard = false;
         //Lucene tokenizer are really low level ...
         try {
+        	tokenizer.reset(); //starting with Solr4 reset MUST BE called before using
             while(tokenizer.incrementToken()){
                 //only interested in the start/end indexes of tokens
                 OffsetAttribute offset = tokenizer.addAttribute(OffsetAttribute.class);

Modified: stanbol/branches/stanbol-solr4/entityhub/yard/solr/src/main/resources/solr/core/default.solrindex.zip
URL: http://svn.apache.org/viewvc/stanbol/branches/stanbol-solr4/entityhub/yard/solr/src/main/resources/solr/core/default.solrindex.zip?rev=1455131&r1=1455130&r2=1455131&view=diff
==============================================================================
Binary files - no diff available.

Modified: stanbol/branches/stanbol-solr4/entityhub/yard/solr/src/main/resources/solr/core/entityhub.solrindex.zip
URL: http://svn.apache.org/viewvc/stanbol/branches/stanbol-solr4/entityhub/yard/solr/src/main/resources/solr/core/entityhub.solrindex.zip?rev=1455131&r1=1455130&r2=1455131&view=diff
==============================================================================
Binary files - no diff available.

Added: stanbol/branches/stanbol-solr4/entityhub/yard/solr/src/main/resources/solr/core/kuromoji.solrindex.zip
URL: http://svn.apache.org/viewvc/stanbol/branches/stanbol-solr4/entityhub/yard/solr/src/main/resources/solr/core/kuromoji.solrindex.zip?rev=1455131&view=auto
==============================================================================
Binary file - no diff available.

Propchange: stanbol/branches/stanbol-solr4/entityhub/yard/solr/src/main/resources/solr/core/kuromoji.solrindex.zip
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: stanbol/branches/stanbol-solr4/entityhub/yard/solr/src/main/resources/solr/core/paoding.solrindex.outdated
URL: http://svn.apache.org/viewvc/stanbol/branches/stanbol-solr4/entityhub/yard/solr/src/main/resources/solr/core/paoding.solrindex.outdated?rev=1455131&view=auto
==============================================================================
Binary file - no diff available.

Propchange: stanbol/branches/stanbol-solr4/entityhub/yard/solr/src/main/resources/solr/core/paoding.solrindex.outdated
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Modified: stanbol/branches/stanbol-solr4/entityhub/yard/solr/src/main/resources/solr/core/smartcn.solrindex.zip
URL: http://svn.apache.org/viewvc/stanbol/branches/stanbol-solr4/entityhub/yard/solr/src/main/resources/solr/core/smartcn.solrindex.zip?rev=1455131&r1=1455130&r2=1455131&view=diff
==============================================================================
Binary files - no diff available.

Modified: stanbol/branches/stanbol-solr4/integration-tests/pom.xml
URL: http://svn.apache.org/viewvc/stanbol/branches/stanbol-solr4/integration-tests/pom.xml?rev=1455131&r1=1455130&r2=1455131&view=diff
==============================================================================
--- stanbol/branches/stanbol-solr4/integration-tests/pom.xml (original)
+++ stanbol/branches/stanbol-solr4/integration-tests/pom.xml Mon Mar 11 13:18:59 2013
@@ -149,7 +149,7 @@
     <dependency>
       <groupId>org.apache.stanbol</groupId>
       <artifactId>org.apache.stanbol.entityhub.test</artifactId>
-      <version>0.11.0</version>
+      <version>0.12.0-SNAPSHOT</version>
     </dependency>
     <dependency>
       <groupId>org.apache.stanbol</groupId>
@@ -235,6 +235,17 @@
       <groupId>org.apache.clerezza</groupId>
       <artifactId>rdf.rdfjson</artifactId>
     </dependency>
+    <!-- use log4j for logging -->
+    <dependency>  <!-- used for debug level logging during tests -->
+      <groupId>org.slf4j</groupId>
+      <artifactId>slf4j-log4j12</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>log4j</groupId>
+      <artifactId>log4j</artifactId>
+      <scope>test</scope>
+    </dependency>
   </dependencies>
 
 </project>

Modified: stanbol/branches/stanbol-solr4/integration-tests/src/test/java/org/apache/stanbol/enhancer/it/EnhancerTestBase.java
URL: http://svn.apache.org/viewvc/stanbol/branches/stanbol-solr4/integration-tests/src/test/java/org/apache/stanbol/enhancer/it/EnhancerTestBase.java?rev=1455131&r1=1455130&r2=1455131&view=diff
==============================================================================
--- stanbol/branches/stanbol-solr4/integration-tests/src/test/java/org/apache/stanbol/enhancer/it/EnhancerTestBase.java (original)
+++ stanbol/branches/stanbol-solr4/integration-tests/src/test/java/org/apache/stanbol/enhancer/it/EnhancerTestBase.java Mon Mar 11 13:18:59 2013
@@ -187,6 +187,13 @@ public class EnhancerTestBase extends St
                 .assertContentRegexp(
                     "http:\\\\/\\\\/.*\\\\/entityhub\\\\/site\\\\/dbpedia\\\\/"
                 );
+                //also assert that the SolrYard for the dbpedia site is fully
+                //initialized
+                executor.execute(
+                        builder.buildGetRequest("/entityhub/site/dbpedia" +
+                        		"/entity?id=urn:does:not:exist:f82js95xsig39s.23987")
+                        .withHeader("Accept", "application/json"))
+                .assertStatus(404);
                 log.info("Enhancement engines checked for '{}', all present", endpoint);
                 return true;
             }

Modified: stanbol/branches/stanbol-solr4/integration-tests/src/test/java/org/apache/stanbol/entityhub/it/EntityhubTest.java
URL: http://svn.apache.org/viewvc/stanbol/branches/stanbol-solr4/integration-tests/src/test/java/org/apache/stanbol/entityhub/it/EntityhubTest.java?rev=1455131&r1=1455130&r2=1455131&view=diff
==============================================================================
--- stanbol/branches/stanbol-solr4/integration-tests/src/test/java/org/apache/stanbol/entityhub/it/EntityhubTest.java (original)
+++ stanbol/branches/stanbol-solr4/integration-tests/src/test/java/org/apache/stanbol/entityhub/it/EntityhubTest.java Mon Mar 11 13:18:59 2013
@@ -117,6 +117,8 @@ public final class EntityhubTest extends
         testEntityUpdated();
         testEntityDelete();
         testEntityDeleted();
+        testEntityDeleteAll();
+        testAllEntitiesDeleted();
     }
     private void testEntityCreation() throws IOException {
         InputStream in = EntityhubTest.class.getClassLoader().getResourceAsStream("doap_Stanbol.rdf");
@@ -198,6 +200,19 @@ public final class EntityhubTest extends
             .withHeader("Accept", "application/json"));
         re.assertStatus(404);
     }
+    private void testEntityDeleteAll() throws IOException {
+        Request request = builder.buildOtherRequest(new HttpDelete(
+            builder.buildUrl("/entityhub/entity", "id", "*")));
+        RequestExecutor re = executor.execute(request);
+        re.assertStatus(200);
+    }
+    private void testAllEntitiesDeleted() throws IOException {
+        String id = "http://xml.apache.org/xerces-c/";
+        RequestExecutor re = executor.execute(
+            builder.buildGetRequest("/entityhub/entity","id",id)
+            .withHeader("Accept", "application/json"));
+        re.assertStatus(404);
+    }
     @Test
     public void testEntityLookup() throws IOException, JSONException {
         String uri = "http://dbpedia.org/resource/Paris";
@@ -285,30 +300,36 @@ public final class EntityhubTest extends
     }
 
     private void testFindLimitAndOffsetQuery() throws IOException, JSONException {
-        FindQueryTestCase test = new FindQueryTestCase("XML*",
+    	//With Solr4 we need a test that produces different scores for results,
+    	//to ensure consistant odering
+        FindQueryTestCase test = new FindQueryTestCase("XML XSL*",
             Arrays.asList(
-                "http://xerces.apache.org/xml-commons/components/external/",
-                "http://xml.apache.org/xerces-c/",
-                "http://xerces.apache.org/xerces2-j/",
-                "http://xerces.apache.org/xerces-p",
-                "http://xerces.apache.org/xml-commons/components/resolver/"),
+                    "http://velocity.apache.org/anakia/",
+                    "http://xalan.apache.org/xalan-c/",
+                    "http://xalan.apache.org/xalan-j/",
+                    "http://velocity.apache.org/dvsl/devel/",
+                    "http://xmlgraphics.apache.org/commons/",
+                    "http://xmlgraphics.apache.org/fop"),
             null);
-        test.setField("http://usefulinc.com/ns/doap#name");
+        test.setField("http://usefulinc.com/ns/doap#description");
+        test.setLimit(10);
         test.setLanguage(null);
         executeQuery(test);
         //repeat the test with offset 2 and limit 2 to only retrieve the 3-4 result
-        test = new FindQueryTestCase("XML*",
+        test = new FindQueryTestCase("XML XSL*",
             Arrays.asList(
-                "http://xerces.apache.org/xml-commons/components/external/",
-                "http://xerces.apache.org/xerces-p"),
+                    "http://xalan.apache.org/xalan-j/",
+                    "http://velocity.apache.org/dvsl/devel/"),
             Arrays.asList(
-                "http://xml.apache.org/xerces-c/",
-                "http://xerces.apache.org/xerces2-j/",
-                "http://xerces.apache.org/xml-commons/components/resolver/"));
-        test.setField("http://usefulinc.com/ns/doap#name");
+                    "http://velocity.apache.org/anakia/",
+                    "http://xalan.apache.org/xalan-c/",
+                    "http://xmlgraphics.apache.org/commons/",
+                    "http://xmlgraphics.apache.org/fop"));
+        test.setField("http://usefulinc.com/ns/doap#description");
         test.setOffset(2);
         test.setLimit(2);
         test.setLanguage(null);
+        executeQuery(test);
         
     }
 

Added: stanbol/branches/stanbol-solr4/integration-tests/src/test/resources/log4j.properties
URL: http://svn.apache.org/viewvc/stanbol/branches/stanbol-solr4/integration-tests/src/test/resources/log4j.properties?rev=1455131&view=auto
==============================================================================
--- stanbol/branches/stanbol-solr4/integration-tests/src/test/resources/log4j.properties (added)
+++ stanbol/branches/stanbol-solr4/integration-tests/src/test/resources/log4j.properties Mon Mar 11 13:18:59 2013
@@ -0,0 +1,24 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Root logger option
+log4j.rootLogger=INFO, stdout
+ 
+# Direct log messages to stdout
+log4j.appender.stdout=org.apache.log4j.ConsoleAppender
+log4j.appender.stdout.Target=System.out
+log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
+log4j.appender.stdout.layout.ConversionPattern=%d{ABSOLUTE} %5p %c{1}:%L - %m%n
+# log4j.logger.org.apache.stanbol=DEBUG
\ No newline at end of file

Propchange: stanbol/branches/stanbol-solr4/integration-tests/src/test/resources/log4j.properties
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Propchange: stanbol/branches/stanbol-solr4/launchers/bundlelists/language-extras/kuromoji/
------------------------------------------------------------------------------
--- svn:ignore (added)
+++ svn:ignore Mon Mar 11 13:18:59 2013
@@ -0,0 +1,5 @@
+.project
+
+.settings
+
+target

Added: stanbol/branches/stanbol-solr4/launchers/bundlelists/language-extras/kuromoji/README.md
URL: http://svn.apache.org/viewvc/stanbol/branches/stanbol-solr4/launchers/bundlelists/language-extras/kuromoji/README.md?rev=1455131&view=auto
==============================================================================
--- stanbol/branches/stanbol-solr4/launchers/bundlelists/language-extras/kuromoji/README.md (added)
+++ stanbol/branches/stanbol-solr4/launchers/bundlelists/language-extras/kuromoji/README.md Mon Mar 11 13:18:59 2013
@@ -0,0 +1,77 @@
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+        http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+
+Japanese language support based on Lucene Kuromoji Analyzer
+==============
+
+This BundleList includes three modules that bring Japanese language support to Apache Stanbol.
+
+See comments in the [lists.xml](src/main/bundles/list.xml) for more details.
+
+Solr Field Configuration
+---
+
+When you plan to use this Analyzer to process Japanese texts it is important to also properly configure the Solr schema.xml used by the Entityhub SolrYard.
+
+For that you will need to add two things:
+
+1. A fieldType specification for Japanese
+
+    :::xml
+    <fieldType name="text_ja" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="false">
+      <analyzer>
+        <tokenizer class="solr.JapaneseTokenizerFactory" mode="search"/>
+        <filter class="solr.JapaneseBaseFormFilterFactory"/>
+        <filter class="solr.JapanesePartOfSpeechStopFilterFactory" tags="lang/stoptags_ja.txt" enablePositionIncrements="true"/>
+        <filter class="solr.CJKWidthFilterFactory"/>
+        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ja.txt" enablePositionIncrements="true" />
+        <filter class="solr.JapaneseKatakanaStemFilterFactory" minimumLength="4"/>
+        <filter class="solr.LowerCaseFilterFactory"/>
+      </analyzer>
+    </fieldType>
+
+2. A dynamic field using this field type that matches against Chinese language literals
+
+    :::xml
+    <!--
+     Dynamic field for Chinese languages.
+     -->
+    <dynamicField name="@ja*" type="text_ja" indexed="true" stored="true" multiValued="true" omitNorms="false"/>
+
+The [kuromoji.solrindex.zip](https://svn.apache.org/repos/asf/stanbol/trunk/entityhub/yard/solr/src/main/resources/solr/core/kuromoji.solrindex.zip) is identical with the default configuration but uses the above fieldType and dynamicField specification.
+
+### Usage with the EntityhubIndexing Tool
+
+1. Extract the [kuromoji.solrindex.zip](https://svn.apache.org/repos/asf/stanbol/trunk/entityhub/yard/solr/src/main/resources/solr/core/kuromoji.solrindex.zip) to the "indexing/config" directory 
+2. Rename the "indexing/config/kuromoji" directory to the {site-name} (the value of the "name" property of the "indexing/config/indexing.properties" file).
+
+As an alternative to (2) you can also explicitly configure the name of the solr config as value to the "solrConf:smartcn" of SolrYardIndexingDestination.
+
+    :::text
+    indexingDestination=org.apache.stanbol.entityhub.indexing.destination.solryard.SolrYardIndexingDestination,solrConf:kuromoji,boosts:fieldboosts
+
+### Usage with the Entityhub SolrYard
+
+If you want to create an empty SolrYard instance using the [kuromoji.solrindex.zip](https://svn.apache.org/repos/asf/stanbol/trunk/entityhub/yard/solr/src/main/resources/solr/core/kuromoji.solrindex.zip) configuration you will need to
+
+1. copy the kuromoji.solrindex.zip to the datafile directory of your Stanbol instance ({working-dir}/stanbol/datafiles)
+2. rename it to the {name} of the SolrYard you want to create. The file name needs to be {name}.solrindex.zip
+3. create the SolrYard instance and configure the "Solr Index/Core" (org.apache.stanbol.entityhub.yard.solr.solrUri) to {name}. Make sure the "Use default SolrCore configuration" (org.apache.stanbol.entityhub.yard.solr.useDefaultConfig) is disabled.
+
+If you want to use the kuromoji.solrindex.zip as default you can rename the file in the datafilee folder to "default.solrindex.zip" and the enable the "Use default SolrCore configuration" (org.apache.stanbol.entityhub.yard.solr.useDefaultConfig) when you configure a SolrYard instance.
+
+See also the documentation on how to [configure a managed site](http://stanbol.apache.org/docs/trunk/components/entityhub/managedsite#configuration-of-managedsites)).

Added: stanbol/branches/stanbol-solr4/launchers/bundlelists/language-extras/kuromoji/pom.xml
URL: http://svn.apache.org/viewvc/stanbol/branches/stanbol-solr4/launchers/bundlelists/language-extras/kuromoji/pom.xml?rev=1455131&view=auto
==============================================================================
--- stanbol/branches/stanbol-solr4/launchers/bundlelists/language-extras/kuromoji/pom.xml (added)
+++ stanbol/branches/stanbol-solr4/launchers/bundlelists/language-extras/kuromoji/pom.xml Mon Mar 11 13:18:59 2013
@@ -0,0 +1,50 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor 
+  license agreements. See the NOTICE file distributed with this work for additional 
+  information regarding copyright ownership. The ASF licenses this file to 
+  You under the Apache License, Version 2.0 (the "License"); you may not use 
+  this file except in compliance with the License. You may obtain a copy of 
+  the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required 
+  by applicable law or agreed to in writing, software distributed under the 
+  License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS 
+  OF ANY KIND, either express or implied. See the License for the specific 
+  language governing permissions and limitations under the License. -->
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+  xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+
+  <modelVersion>4.0.0</modelVersion>
+  <prerequisites> <!-- The maven-launchpad-plugin requires maven 3 -->
+    <maven>3.0.3</maven>
+  </prerequisites>
+
+  <parent>
+    <groupId>org.apache.stanbol</groupId>
+    <artifactId>apache-stanbol-bundlelists</artifactId>
+    <version>0.10.0-SNAPSHOT</version>
+    <relativePath>../..</relativePath>
+  </parent>
+
+  <groupId>org.apache.stanbol</groupId>
+  <artifactId>org.apache.stanbol.launchers.bundlelists.languageextras.kuromoji</artifactId>
+  <version>0.10.0-SNAPSHOT</version>
+  <packaging>partialbundlelist</packaging>
+
+  <name>Apache Stanbol Bundlelist for Language Support: Kuromoji Japanese</name>
+    <description>
+      Provides modules that bring language support for Japanese using
+      the Solr/Lucene kuromoji analyzer. This includes a (1) Bundle providing the
+      Solr Analyzer; (2) an NLP processing Engine that Tokenizes, detects
+      sentences, POS taggs, extracts Named Entities and Lemmatizes Japanese text
+      (3) an LabelTokenizer needed to match tokens of the analyzed text with
+      the labels of Entities in the matched vocabularies.
+    </description>
+
+  <build>
+    <plugins>
+      <plugin>
+        <groupId>org.apache.sling</groupId>
+        <artifactId>maven-launchpad-plugin</artifactId>
+      </plugin>
+    </plugins>
+  </build>
+</project>

Propchange: stanbol/branches/stanbol-solr4/launchers/bundlelists/language-extras/kuromoji/pom.xml
------------------------------------------------------------------------------
    svn:executable = *

Added: stanbol/branches/stanbol-solr4/launchers/bundlelists/language-extras/kuromoji/src/main/bundles/list.xml
URL: http://svn.apache.org/viewvc/stanbol/branches/stanbol-solr4/launchers/bundlelists/language-extras/kuromoji/src/main/bundles/list.xml?rev=1455131&view=auto
==============================================================================
--- stanbol/branches/stanbol-solr4/launchers/bundlelists/language-extras/kuromoji/src/main/bundles/list.xml (added)
+++ stanbol/branches/stanbol-solr4/launchers/bundlelists/language-extras/kuromoji/src/main/bundles/list.xml Mon Mar 11 13:18:59 2013
@@ -0,0 +1,58 @@
+<?xml version="1.0" ?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<bundles>
+  <!-- 
+   The kuromoji analyzer bundle (extension to o.a.s.commons.solr.core module)
+   -->
+  <startLevel level="28"> <!-- commons.solr.core uses startlevel 27 -->
+    <bundle>
+      <groupId>org.apache.stanbol</groupId>
+      <artifactId>org.apache.stanbol.commons.solr.extras.kuromoji</artifactId>
+      <version>0.12.0-SNAPSHOT</version>
+    </bundle>
+  </startLevel>
+  
+  <!--
+   The kuromoji NLP processing engine 
+   -->
+  <startLevel level="35"> <!-- same startlevel as other Enhancement Engines -->
+    <bundle>
+      <groupId>org.apache.stanbol</groupId>
+      <artifactId>org.apache.stanbol.enhancer.engines.kuromoji.nlp</artifactId>
+      <version>0.10.1-SNAPSHOT</version>
+    </bundle>
+  </startLevel>
+
+  <!--
+   The Japanese LabelTokenizer required by the EntityLinkingEngine to compare
+   Tokens in the AnalyzedText with Labels of the Entities found in the
+   Controlled vocabulary.
+   -->
+  <!-- 
+   startlevel needs to be greater as those of the EntityLinkingEngine
+   (o.a.s.enhancer.engines.entitylinking.engine) module
+   -->
+  <startLevel level="36">
+    <bundle>
+      <groupId>org.apache.stanbol</groupId>
+      <artifactId>org.apache.stanbol.enhancer.engines.entitylinking.labeltokenizer.kuromoji</artifactId>
+      <version>0.10.1-SNAPSHOT</version>
+    </bundle>
+  </startLevel>
+  
+</bundles>
\ No newline at end of file

Modified: stanbol/branches/stanbol-solr4/launchers/bundlelists/pom.xml
URL: http://svn.apache.org/viewvc/stanbol/branches/stanbol-solr4/launchers/bundlelists/pom.xml?rev=1455131&r1=1455130&r2=1455131&view=diff
==============================================================================
--- stanbol/branches/stanbol-solr4/launchers/bundlelists/pom.xml (original)
+++ stanbol/branches/stanbol-solr4/launchers/bundlelists/pom.xml Mon Mar 11 13:18:59 2013
@@ -82,6 +82,7 @@
     <!-- language specific extensions -->
     <module>language-extras/smartcn</module>
     <module>language-extras/paoding</module>
+    <module>language-extras/kuromoji</module>
   </modules>
 
   <profiles>

Modified: stanbol/branches/stanbol-solr4/launchers/bundlelists/stanbolcommons/src/main/bundles/list.xml
URL: http://svn.apache.org/viewvc/stanbol/branches/stanbol-solr4/launchers/bundlelists/stanbolcommons/src/main/bundles/list.xml?rev=1455131&r1=1455130&r2=1455131&view=diff
==============================================================================
--- stanbol/branches/stanbol-solr4/launchers/bundlelists/stanbolcommons/src/main/bundles/list.xml (original)
+++ stanbol/branches/stanbol-solr4/launchers/bundlelists/stanbolcommons/src/main/bundles/list.xml Mon Mar 11 13:18:59 2013
@@ -134,6 +134,16 @@
       <version>1.8.3_1</version>
     </bundle>
     <bundle>
+      <groupId>com.google.guava</groupId>
+      <artifactId>guava</artifactId>
+      <version>13.0.1</version>
+    </bundle>
+    <bundle> <!-- used by Solr4 spatial -->
+      <groupId>org.apache.servicemix.bundles</groupId>
+      <artifactId>org.apache.servicemix.bundles.spatial4j</artifactId>
+      <version>0.3_1</version>
+    </bundle>
+    <bundle>
       <groupId>org.apache.httpcomponents</groupId>
       <artifactId>httpcore-osgi</artifactId>
       <version>4.2.3</version>
@@ -229,11 +239,11 @@
       <version>1.15</version>
     </bundle>
     <!-- needed to read data from mime multipart requests -->
-    <bundle>
+    <!-- bundle>
       <groupId>org.apache.clerezza</groupId>
       <artifactId>jaxrs.utils</artifactId>
       <version>0.6-incubating</version>
-    </bundle>
+    </bundle -->
     <!-- still used in many places also it only runs on jersey and the code is not portable across jax-rs implementations -->
     <bundle>
       <groupId>com.sun.jersey.contribs</groupId>

Modified: stanbol/branches/stanbol-solr4/launchers/full/pom.xml
URL: http://svn.apache.org/viewvc/stanbol/branches/stanbol-solr4/launchers/full/pom.xml?rev=1455131&r1=1455130&r2=1455131&view=diff
==============================================================================
--- stanbol/branches/stanbol-solr4/launchers/full/pom.xml (original)
+++ stanbol/branches/stanbol-solr4/launchers/full/pom.xml Mon Mar 11 13:18:59 2013
@@ -283,19 +283,27 @@
     <!-- Basic Cinese language support (STANBOL-855 -->
     <dependency>
       <groupId>org.apache.stanbol</groupId>
-	  <artifactId>org.apache.stanbol.launchers.bundlelists.languageextras.smartcn</artifactId>
-	  <version>0.10.0-SNAPSHOT</version>
+      <artifactId>org.apache.stanbol.launchers.bundlelists.languageextras.smartcn</artifactId>
+      <version>0.10.0-SNAPSHOT</version>
       <type>partialbundlelist</type>
       <scope>provided</scope>
     </dependency>
-    <dependency>
+    <!-- TODO Paoding does not yet support Solr 4 -->
+    <!-- dependency>
       <groupId>org.apache.stanbol</groupId>
       <artifactId>org.apache.stanbol.launchers.bundlelists.languageextras.paoding</artifactId>
       <version>0.10.0-SNAPSHOT</version>
       <type>partialbundlelist</type>
       <scope>provided</scope>
+    </dependency -->
+    <!-- Japanese Language Support -->
+    <dependency>
+      <groupId>org.apache.stanbol</groupId>
+      <artifactId>org.apache.stanbol.launchers.bundlelists.languageextras.kuromoji</artifactId>
+      <version>0.10.0-SNAPSHOT</version>
+      <type>partialbundlelist</type>
+      <scope>provided</scope>
     </dependency>
     
-    
   </dependencies>
 </project>

Modified: stanbol/branches/stanbol-solr4/parent/pom.xml
URL: http://svn.apache.org/viewvc/stanbol/branches/stanbol-solr4/parent/pom.xml?rev=1455131&r1=1455130&r2=1455131&view=diff
==============================================================================
--- stanbol/branches/stanbol-solr4/parent/pom.xml (original)
+++ stanbol/branches/stanbol-solr4/parent/pom.xml Mon Mar 11 13:18:59 2013
@@ -60,7 +60,7 @@
     <jersey-version>1.15</jersey-version>
     <freemarker-version>2.3.19</freemarker-version>
     <owlapi-version>3.3</owlapi-version>
-    <solr-version>3.6.1</solr-version>
+    <solr-version>4.1.0</solr-version>
     <pax-exam-version>2.3.0.M1</pax-exam-version>
     <sourceReleaseAssemblyDescriptor>stanbol-source-release-zip-tar</sourceReleaseAssemblyDescriptor>    
   </properties>
@@ -736,6 +736,13 @@
         <artifactId>httpmime</artifactId>
         <version>4.2.1</version>
       </dependency>
+      
+      <!-- Google Commons -->
+      <dependency>
+        <groupId>com.google.guava</groupId>
+        <artifactId>guava</artifactId>
+        <version>13.0.1</version>
+      </dependency>
 
       <!-- Joda Time -->
       <dependency>
@@ -966,12 +973,16 @@
         <groupId>org.apache.solr</groupId>
         <artifactId>solr-core</artifactId>
         <version>${solr-version}</version>
-        <!-- exclusions>
+        <exclusions>
+            <exclusion>
+                <groupId>org.slf4j</groupId>
+                <artifactId>jcl-over-slf4j</artifactId>
+            </exclusion>
             <exclusion>
                 <groupId>org.slf4j</groupId>
                 <artifactId>slf4j-jdk14</artifactId>
             </exclusion>
-        </exclusions -->
+        </exclusions>
       </dependency>
       <!-- dependency>
         <groupId>org.apache.solr</groupId>
@@ -1023,7 +1034,7 @@
     </dependency>
     <dependency>
       <groupId>org.apache.lucene</groupId>
-      <artifactId>lucene-analyzers</artifactId>
+      <artifactId>lucene-analyzers-common</artifactId>
       <version>${solr-version}</version>
     </dependency>
     <dependency>
@@ -1038,6 +1049,11 @@
     </dependency>
     <dependency>
       <groupId>org.apache.lucene</groupId>
+      <artifactId>lucene-codecs</artifactId>
+      <version>${solr-version}</version>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.lucene</groupId>
       <artifactId>lucene-misc</artifactId>
       <version>${solr-version}</version>
     </dependency>
@@ -1048,9 +1064,29 @@
     </dependency>
     <dependency>
       <groupId>org.apache.lucene</groupId>
-      <artifactId>lucene-phonetic</artifactId>
+      <artifactId>lucene-queryparser</artifactId>
+      <version>${solr-version}</version>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.lucene</groupId>
+      <artifactId>lucene-analyzers-phonetic</artifactId>
+      <version>${solr-version}</version>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.lucene</groupId>
+      <artifactId>lucene-analyzers-stempel</artifactId>
       <version>${solr-version}</version>
     </dependency>
+    <dependency>
+      <groupId>org.apache.lucene</groupId>
+      <artifactId>lucene-analyzers-smartcn</artifactId>
+      <version>${solr-version}</version>
+    </dependency>
+      <dependency>
+        <groupId>org.apache.lucene</groupId>
+        <artifactId>lucene-analyzers-kuromoji</artifactId>
+        <version>${solr-version}</version>
+      </dependency>
     <!-- Snowball moved to analyzer in 3.1<dependency>
         <groupId>org.apache.lucene</groupId>
         <artifactId>lucene-snowball</artifactId>
@@ -1058,7 +1094,12 @@
     </dependency>  -->
     <dependency>
       <groupId>org.apache.lucene</groupId>
-      <artifactId>lucene-spellchecker</artifactId>
+      <artifactId>lucene-suggest</artifactId>
+      <version>${solr-version}</version>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.lucene</groupId>
+      <artifactId>lucene-sandbox</artifactId>
       <version>${solr-version}</version>
     </dependency>
     <dependency>
@@ -1068,7 +1109,7 @@
     </dependency>
     <dependency>
       <groupId>org.apache.lucene</groupId>
-      <artifactId>lucene-icu</artifactId>
+      <artifactId>lucene-analyzers-icu</artifactId>
       <version>${solr-version}</version>
     </dependency>
     <dependency>
@@ -1081,28 +1122,6 @@
         <artifactId>lucene-grouping</artifactId>
         <version>${solr-version}</version>
     </dependency>
-    <!-- other unused Lucene bundes
-    <dependency>
-        <groupId>org.apache.lucene</groupId>
-        <artifactId>lucene-queryparser</artifactId>
-        <version>${solr-version}</version>
-    </dependency>
-    <dependency>
-        <groupId>org.apache.lucene</groupId>
-        <artifactId>lucene-smartcn</artifactId>
-        <version>${solr-version}</version>
-    </dependency>
-    <dependency>
-        <groupId>org.apache.lucene</groupId>
-        <artifactId>lucene-stempel</artifactId>
-        <version>${solr-version}</version>
-    </dependency>
-    <dependency>
-        <groupId>org.apache.lucene</groupId>
-        <artifactId>lucene-kuromoji</artifactId>
-        <version>${solr-version}</version>
-    </dependency>
-    -->
     <!-- Other Solr/Lucene dependendies -->
     <!-- StAX Parser (used by Solr/Lucene) -->
     <!-- dependency> stay api is included in java 1.6
@@ -1121,17 +1140,6 @@
         </exclusion>
       </exclusions>
     </dependency>
-    <!-- dependency>
-      <groupId>stax</groupId>
-      <artifactId>stax-api</artifactId>
-      <version>1.0.1</version>
-      <scope>runtime</scope>
-    </dependency>
-    <dependency>
-      <groupId>org.codehaus.woodstox</groupId>
-      <artifactId>wstx-asl</artifactId>
-      <version>3.2.7</version>
-    </dependency -->
     <dependency>
       <groupId>org.apache.servicemix.bundles</groupId>
       <artifactId>org.apache.servicemix.bundles.regexp</artifactId>
@@ -1147,6 +1155,21 @@
       <artifactId>portlet-api</artifactId>
       <version>2.0</version>
     </dependency>
+    <dependency>
+      <groupId>org.apache.zookeeper</groupId>
+      <artifactId>zookeeper</artifactId>
+      <version>3.4.5</version>
+      <exclusions>
+        <exclusion>
+          <groupId>log4j</groupId>
+          <artifactId>log4j</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>org.slf4j</groupId>
+          <artifactId>slf4j-log4j12</artifactId>
+        </exclusion>
+      </exclusions>
+    </dependency>
     <!-- END Solr/Lucene dependencies -->
 
     <!-- JDom -->