You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by dw...@apache.org on 2013/08/09 11:03:22 UTC

svn commit: r1512208 - in /lucene/dev/branches/branch_4x: ./ dev-tools/ dev-tools/maven/ lucene/ lucene/analysis/ lucene/analysis/morfologik/ lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/ lucene/analysis/morfologik/src/test...

Author: dweiss
Date: Fri Aug  9 09:03:20 2013
New Revision: 1512208

URL: http://svn.apache.org/r1512208
Log:
LUCENE-5089: Update to Morfologik 1.6.0 (Backport from trunk)
SOLR-5126: Update to Morfologik 1.7.1.
SOLR-5126: Update to Carrot2 1.8.0.


Added:
    lucene/dev/branches/branch_4x/lucene/licenses/morfologik-fsa-1.7.1.jar.sha1
      - copied unchanged from r1512203, lucene/dev/trunk/lucene/licenses/morfologik-fsa-1.7.1.jar.sha1
    lucene/dev/branches/branch_4x/lucene/licenses/morfologik-polish-1.7.1.jar.sha1
      - copied unchanged from r1512203, lucene/dev/trunk/lucene/licenses/morfologik-polish-1.7.1.jar.sha1
    lucene/dev/branches/branch_4x/lucene/licenses/morfologik-stemming-1.7.1.jar.sha1
      - copied unchanged from r1512203, lucene/dev/trunk/lucene/licenses/morfologik-stemming-1.7.1.jar.sha1
    lucene/dev/branches/branch_4x/solr/licenses/attributes-binder-1.2.0.jar.sha1
      - copied unchanged from r1512203, lucene/dev/trunk/solr/licenses/attributes-binder-1.2.0.jar.sha1
    lucene/dev/branches/branch_4x/solr/licenses/carrot2-mini-3.8.0.jar.sha1
      - copied unchanged from r1512203, lucene/dev/trunk/solr/licenses/carrot2-mini-3.8.0.jar.sha1
    lucene/dev/branches/branch_4x/solr/licenses/hppc-0.5.2.jar.sha1
      - copied unchanged from r1512203, lucene/dev/trunk/solr/licenses/hppc-0.5.2.jar.sha1
    lucene/dev/branches/branch_4x/solr/licenses/morfologik-fsa-1.7.1.jar.sha1
      - copied unchanged from r1512203, lucene/dev/trunk/solr/licenses/morfologik-fsa-1.7.1.jar.sha1
    lucene/dev/branches/branch_4x/solr/licenses/morfologik-polish-1.7.1.jar.sha1
      - copied unchanged from r1512203, lucene/dev/trunk/solr/licenses/morfologik-polish-1.7.1.jar.sha1
    lucene/dev/branches/branch_4x/solr/licenses/morfologik-stemming-1.7.1.jar.sha1
      - copied unchanged from r1512203, lucene/dev/trunk/solr/licenses/morfologik-stemming-1.7.1.jar.sha1
    lucene/dev/branches/branch_4x/solr/licenses/simple-xml-2.7.jar.sha1
      - copied unchanged from r1512203, lucene/dev/trunk/solr/licenses/simple-xml-2.7.jar.sha1
Removed:
    lucene/dev/branches/branch_4x/lucene/licenses/morfologik-fsa-1.5.5.jar.sha1
    lucene/dev/branches/branch_4x/lucene/licenses/morfologik-polish-1.5.5.jar.sha1
    lucene/dev/branches/branch_4x/lucene/licenses/morfologik-stemming-1.5.5.jar.sha1
    lucene/dev/branches/branch_4x/solr/licenses/attributes-binder-1.0.1.jar.sha1
    lucene/dev/branches/branch_4x/solr/licenses/carrot2-mini-3.6.2.jar.sha1
    lucene/dev/branches/branch_4x/solr/licenses/hppc-0.4.1.jar.sha1
    lucene/dev/branches/branch_4x/solr/licenses/morfologik-fsa-1.5.5.jar.sha1
    lucene/dev/branches/branch_4x/solr/licenses/morfologik-polish-1.5.5.jar.sha1
    lucene/dev/branches/branch_4x/solr/licenses/morfologik-stemming-1.5.5.jar.sha1
    lucene/dev/branches/branch_4x/solr/licenses/simple-xml-2.6.4.jar.sha1
Modified:
    lucene/dev/branches/branch_4x/   (props changed)
    lucene/dev/branches/branch_4x/dev-tools/   (props changed)
    lucene/dev/branches/branch_4x/dev-tools/maven/pom.xml.template
    lucene/dev/branches/branch_4x/lucene/   (props changed)
    lucene/dev/branches/branch_4x/lucene/CHANGES.txt
    lucene/dev/branches/branch_4x/lucene/analysis/   (props changed)
    lucene/dev/branches/branch_4x/lucene/analysis/morfologik/ivy.xml
    lucene/dev/branches/branch_4x/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikAnalyzer.java
    lucene/dev/branches/branch_4x/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikFilter.java
    lucene/dev/branches/branch_4x/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikFilterFactory.java
    lucene/dev/branches/branch_4x/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorphosyntacticTagsAttribute.java
    lucene/dev/branches/branch_4x/lucene/analysis/morfologik/src/test/org/apache/lucene/analysis/morfologik/TestMorfologikAnalyzer.java
    lucene/dev/branches/branch_4x/lucene/licenses/   (props changed)
    lucene/dev/branches/branch_4x/lucene/licenses/morfologik-fsa-LICENSE-BSD.txt
    lucene/dev/branches/branch_4x/lucene/licenses/morfologik-polish-LICENSE-BSD.txt
    lucene/dev/branches/branch_4x/lucene/licenses/morfologik-polish-NOTICE.txt
    lucene/dev/branches/branch_4x/lucene/licenses/morfologik-stemming-LICENSE-BSD.txt
    lucene/dev/branches/branch_4x/solr/   (props changed)
    lucene/dev/branches/branch_4x/solr/CHANGES.txt
    lucene/dev/branches/branch_4x/solr/contrib/   (props changed)
    lucene/dev/branches/branch_4x/solr/contrib/analysis-extras/ivy.xml
    lucene/dev/branches/branch_4x/solr/contrib/clustering/ivy.xml
    lucene/dev/branches/branch_4x/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/DocumentClusteringEngine.java
    lucene/dev/branches/branch_4x/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngine.java
    lucene/dev/branches/branch_4x/solr/contrib/clustering/src/test/org/apache/solr/handler/clustering/ClusteringComponentTest.java
    lucene/dev/branches/branch_4x/solr/contrib/clustering/src/test/org/apache/solr/handler/clustering/carrot2/EchoClusteringAlgorithm.java
    lucene/dev/branches/branch_4x/solr/licenses/   (props changed)
    lucene/dev/branches/branch_4x/solr/licenses/morfologik-fsa-LICENSE-BSD.txt
    lucene/dev/branches/branch_4x/solr/licenses/morfologik-polish-LICENSE-BSD.txt
    lucene/dev/branches/branch_4x/solr/licenses/morfologik-polish-NOTICE.txt
    lucene/dev/branches/branch_4x/solr/licenses/morfologik-stemming-LICENSE-BSD.txt

Modified: lucene/dev/branches/branch_4x/dev-tools/maven/pom.xml.template
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/dev-tools/maven/pom.xml.template?rev=1512208&r1=1512207&r2=1512208&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/dev-tools/maven/pom.xml.template (original)
+++ lucene/dev/branches/branch_4x/dev-tools/maven/pom.xml.template Fri Aug  9 09:03:20 2013
@@ -407,12 +407,12 @@
       <dependency>
         <groupId>org.carrot2</groupId>
         <artifactId>carrot2-mini</artifactId>
-        <version>3.6.2</version>
+        <version>3.8.0</version>
       </dependency>
       <dependency>
         <groupId>org.carrot2</groupId>
         <artifactId>morfologik-polish</artifactId>
-        <version>1.5.5</version>
+        <version>1.7.1</version>
       </dependency>
       <dependency>
         <groupId>org.codehaus.woodstox</groupId>

Modified: lucene/dev/branches/branch_4x/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/CHANGES.txt?rev=1512208&r1=1512207&r2=1512208&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/CHANGES.txt (original)
+++ lucene/dev/branches/branch_4x/lucene/CHANGES.txt Fri Aug  9 09:03:20 2013
@@ -131,6 +131,10 @@ Changes in backwards compatibility polic
   CheckIndex.fixIndex(Status). If you used to pass a codec to this method, just
   remove it from the arguments. (Adrien Grand)
 
+* LUCENE-5089, SOLR-5126: Update to Morfologik 1.7.1. MorfologikAnalyzer and MorfologikFilter 
+  no longer support multiple "dictionaries" as there is only one dictionary available.
+  (Dawid Weiss)
+
 ======================= Lucene 4.4.0 =======================
 
 Changes in backwards compatibility policy

Modified: lucene/dev/branches/branch_4x/lucene/analysis/morfologik/ivy.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/morfologik/ivy.xml?rev=1512208&r1=1512207&r2=1512208&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/morfologik/ivy.xml (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/morfologik/ivy.xml Fri Aug  9 09:03:20 2013
@@ -19,9 +19,9 @@
 <ivy-module version="2.0">
     <info organisation="org.apache.lucene" module="analyzers-morfologik"/>
     <dependencies>
-      <dependency org="org.carrot2" name="morfologik-polish" rev="1.5.5" transitive="false"/>
-      <dependency org="org.carrot2" name="morfologik-fsa" rev="1.5.5" transitive="false"/>
-      <dependency org="org.carrot2" name="morfologik-stemming" rev="1.5.5" transitive="false"/>
+      <dependency org="org.carrot2" name="morfologik-polish" rev="1.7.1" transitive="false"/>
+      <dependency org="org.carrot2" name="morfologik-fsa" rev="1.7.1" transitive="false"/>
+      <dependency org="org.carrot2" name="morfologik-stemming" rev="1.7.1" transitive="false"/>
       <exclude org="*" ext="*" matcher="regexp" type="${ivy.exclude.types}"/> 
     </dependencies>
 </ivy-module>

Modified: lucene/dev/branches/branch_4x/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikAnalyzer.java?rev=1512208&r1=1512207&r2=1512208&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikAnalyzer.java (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikAnalyzer.java Fri Aug  9 09:03:20 2013
@@ -26,38 +26,21 @@ import org.apache.lucene.analysis.standa
 import org.apache.lucene.analysis.standard.StandardTokenizer;
 import org.apache.lucene.util.Version;
 
-import morfologik.stemming.PolishStemmer.DICTIONARY;
-
 /**
  * {@link org.apache.lucene.analysis.Analyzer} using Morfologik library.
  * @see <a href="http://morfologik.blogspot.com/">Morfologik project page</a>
  */
 public class MorfologikAnalyzer extends Analyzer {
-
-  private final DICTIONARY dictionary;
   private final Version version;
 
   /**
-   * Builds an analyzer for a given PolishStemmer.DICTIONARY enum.
-   * 
-   * @param vers
-   *          lucene compatibility version
-   * @param dict
-   *          A constant specifying which dictionary to choose. See the
-   *          Morfologik documentation for details or use the default.
-   */
-  public MorfologikAnalyzer(final Version vers, final DICTIONARY dict) {
-    this.version = vers;
-    this.dictionary = dict;
-  }
-
-  /**
-   * Builds an analyzer for an original MORFOLOGIK dictionary.
+   * Builds an analyzer with the default Morfologik's dictionary (polimorf).
    * 
-   * @param vers         lucene compatibility version
+   * @param version
+   *          Lucene compatibility version
    */
-  public MorfologikAnalyzer(final Version vers) {
-    this(vers, DICTIONARY.MORFOLOGIK);
+  public MorfologikAnalyzer(final Version version) {
+    this.version = version;
   }
 
   /**
@@ -78,7 +61,7 @@ public class MorfologikAnalyzer extends 
     final Tokenizer src = new StandardTokenizer(this.version, reader);
     
     return new TokenStreamComponents(
-      src,
-      new MorfologikFilter(new StandardFilter(this.version, src), this.dictionary, this.version));
+        src, 
+        new MorfologikFilter(new StandardFilter(this.version, src), this.version));
   }
 }

Modified: lucene/dev/branches/branch_4x/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikFilter.java?rev=1512208&r1=1512207&r2=1512208&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikFilter.java (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikFilter.java Fri Aug  9 09:03:20 2013
@@ -20,9 +20,9 @@ package org.apache.lucene.analysis.morfo
 
 import java.io.IOException;
 import java.util.*;
+import java.util.regex.Pattern;
 
 import morfologik.stemming.*;
-import morfologik.stemming.PolishStemmer.DICTIONARY;
 
 import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.TokenStream;
@@ -33,10 +33,11 @@ import org.apache.lucene.analysis.util.C
 import org.apache.lucene.util.*;
 
 /**
- * {@link TokenFilter} using Morfologik library.
+ * {@link TokenFilter} using Morfologik library to transform input tokens into lemma and
+ * morphosyntactic (POS) tokens. Applies to Polish only.  
  *
- * MorfologikFilter contains a {@link MorphosyntacticTagsAttribute}, which provides morphosyntactic
- * annotations for produced lemmas. See the Morfologik documentation for details.
+ * <p>MorfologikFilter contains a {@link MorphosyntacticTagsAttribute}, which provides morphosyntactic
+ * annotations for produced lemmas. See the Morfologik documentation for details.</p>
  * 
  * @see <a href="http://morfologik.blogspot.com/">Morfologik project page</a>
  */
@@ -60,13 +61,10 @@ public class MorfologikFilter extends To
   private int lemmaListIndex;
 
   /**
-   * Builds a filter for given PolishStemmer.DICTIONARY enum.
-   * 
    * @param in   input token stream
-   * @param dict PolishStemmer.DICTIONARY enum
    * @param version Lucene version compatibility for lowercasing.
    */
-  public MorfologikFilter(final TokenStream in, final DICTIONARY dict, final Version version) {
+  public MorfologikFilter(final TokenStream in, final Version version) {
     super(in);
     this.input = in;
     
@@ -75,7 +73,7 @@ public class MorfologikFilter extends To
     ClassLoader cl = me.getContextClassLoader();
     try {
       me.setContextClassLoader(PolishStemmer.class.getClassLoader());
-      this.stemmer = new PolishStemmer(dict);
+      this.stemmer = new PolishStemmer();
       this.charUtils = CharacterUtils.getInstance(version);
       this.lemmaList = Collections.emptyList();
     } finally {
@@ -83,44 +81,30 @@ public class MorfologikFilter extends To
     }  
   }
 
+  /**
+   * A pattern used to split lemma forms.
+   */
+  private final static Pattern lemmaSplitter = Pattern.compile("\\+|\\|");
+
   private void popNextLemma() {
-    // Collect all tags for the next unique lemma.
-    CharSequence currentStem;
-    int tags = 0;
-    do {
-      final WordData lemma = lemmaList.get(lemmaListIndex++);
-      currentStem = lemma.getStem();
-      final CharSequence tag = lemma.getTag();
-      if (tag != null) {
-        if (tagsList.size() <= tags) {
+    // One tag (concatenated) per lemma.
+    final WordData lemma = lemmaList.get(lemmaListIndex++);
+    termAtt.setEmpty().append(lemma.getStem());
+    CharSequence tag = lemma.getTag();
+    if (tag != null) {
+      String[] tags = lemmaSplitter.split(tag.toString());
+      for (int i = 0; i < tags.length; i++) {
+        if (tagsList.size() <= i) {
           tagsList.add(new StringBuilder());
         }
-
-        final StringBuilder buffer = tagsList.get(tags++);  
+        StringBuilder buffer = tagsList.get(i);
         buffer.setLength(0);
-        buffer.append(lemma.getTag());
-      }
-    } while (lemmaListIndex < lemmaList.size() &&
-             equalCharSequences(lemmaList.get(lemmaListIndex).getStem(), currentStem));
-
-    // Set the lemma's base form and tags as attributes.
-    termAtt.setEmpty().append(currentStem);
-    tagsAtt.setTags(tagsList.subList(0, tags));
-  }
-
-  /**
-   * Compare two char sequences for equality. Assumes non-null arguments. 
-   */
-  private static final boolean equalCharSequences(CharSequence s1, CharSequence s2) {
-    int len1 = s1.length();
-    int len2 = s2.length();
-    if (len1 != len2) return false;
-    for (int i = len1; --i >= 0;) {
-      if (s1.charAt(i) != s2.charAt(i)) { 
-        return false; 
+        buffer.append(tags[i]);
       }
+      tagsAtt.setTags(tagsList.subList(0, tags.length));
+    } else {
+      tagsAtt.setTags(Collections.<StringBuilder> emptyList());
     }
-    return true;
   }
 
   /**

Modified: lucene/dev/branches/branch_4x/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikFilterFactory.java?rev=1512208&r1=1512207&r2=1512208&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikFilterFactory.java (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikFilterFactory.java Fri Aug  9 09:03:20 2013
@@ -17,11 +17,8 @@ package org.apache.lucene.analysis.morfo
  * limitations under the License.
  */
 
-import java.util.Arrays;
-import java.util.Locale;
 import java.util.Map;
-
-import morfologik.stemming.PolishStemmer.DICTIONARY;
+import java.util.logging.Logger;
 
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.util.TokenFilterFactory;
@@ -32,39 +29,31 @@ import org.apache.lucene.analysis.util.T
  * &lt;fieldType name="text_polish" class="solr.TextField" positionIncrementGap="100"&gt;
  *   &lt;analyzer&gt;
  *     &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
- *     &lt;filter class="solr.MorfologikFilterFactory" dictionary="MORFOLOGIK" /&gt;
+ *     &lt;filter class="solr.MorfologikFilterFactory" /&gt;
  *   &lt;/analyzer&gt;
  * &lt;/fieldType&gt;</pre>
  * 
- * <p>Any of Morfologik dictionaries can be used, these are at the moment:
- * <code>MORFOLOGIK</code> (Morfologik's original dictionary),
- * <code>MORFEUSZ</code> (Morfeusz-SIAT),
- * <code>COMBINED</code> (both of the dictionaries above, combined).
- * 
  * @see <a href="http://morfologik.blogspot.com/">Morfologik web site</a>
  */
 public class MorfologikFilterFactory extends TokenFilterFactory {
-  /** Dictionary. */
-  private DICTIONARY dictionary = DICTIONARY.MORFOLOGIK;
-  
   /** Schema attribute. */
+  @Deprecated
   public static final String DICTIONARY_SCHEMA_ATTRIBUTE = "dictionary";
-  
+
   /** Creates a new MorfologikFilterFactory */
   public MorfologikFilterFactory(Map<String,String> args) {
     super(args);
+
+    // Be specific about no-longer-supported dictionary attribute.
     String dictionaryName = get(args, DICTIONARY_SCHEMA_ATTRIBUTE);
     if (dictionaryName != null && !dictionaryName.isEmpty()) {
-      try {
-        DICTIONARY dictionary = DICTIONARY.valueOf(dictionaryName.toUpperCase(Locale.ROOT));
-        assert dictionary != null;
-        this.dictionary = dictionary;
-      } catch (IllegalArgumentException e) {
-        throw new IllegalArgumentException("The " + DICTIONARY_SCHEMA_ATTRIBUTE + " attribute accepts the "
-            + "following constants: " + Arrays.toString(DICTIONARY.values()) + ", this value is invalid: "  
-            + dictionaryName);
-      }
+      // We do not throw a hard exception on 4.x branch to keep it backward compatible.
+      // Emit a warning though.
+      Logger.getLogger(MorfologikFilterFactory.class.getName())
+        .warning("The " + DICTIONARY_SCHEMA_ATTRIBUTE + " attribute is no "
+          + "longer supported (Morfologik has one dictionary): " + dictionaryName);
     }
+
     if (!args.isEmpty()) {
       throw new IllegalArgumentException("Unknown parameters: " + args);
     }
@@ -72,6 +61,6 @@ public class MorfologikFilterFactory ext
 
   @Override
   public TokenStream create(TokenStream ts) {
-    return new MorfologikFilter(ts, dictionary, luceneMatchVersion);
+    return new MorfologikFilter(ts, luceneMatchVersion);
   }
 }

Modified: lucene/dev/branches/branch_4x/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorphosyntacticTagsAttribute.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorphosyntacticTagsAttribute.java?rev=1512208&r1=1512207&r2=1512208&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorphosyntacticTagsAttribute.java (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorphosyntacticTagsAttribute.java Fri Aug  9 09:03:20 2013
@@ -23,9 +23,9 @@ import java.util.List;
 import org.apache.lucene.util.Attribute;
 
 /** 
- * Morfologik dictionaries provide morphosyntactic annotations for
+ * Morfologik provides morphosyntactic annotations for
  * surface forms. For the exact format and description of these,
- * see the project's documentation (annotations vary by dictionary!).
+ * see the project's documentation.
  */
 public interface MorphosyntacticTagsAttribute extends Attribute {
   /** 
@@ -36,7 +36,9 @@ public interface MorphosyntacticTagsAttr
   public void setTags(List<StringBuilder> tags);
 
   /** 
-   * Returns the POS tag of the term.
+   * Returns the POS tag of the term. A single word may have multiple POS tags, 
+   * depending on the interpretation (context disambiguation is typically needed
+   * to determine which particular tag is appropriate).  
    */
   public List<StringBuilder> getTags();
 

Modified: lucene/dev/branches/branch_4x/lucene/analysis/morfologik/src/test/org/apache/lucene/analysis/morfologik/TestMorfologikAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/morfologik/src/test/org/apache/lucene/analysis/morfologik/TestMorfologikAnalyzer.java?rev=1512208&r1=1512207&r2=1512208&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/morfologik/src/test/org/apache/lucene/analysis/morfologik/TestMorfologikAnalyzer.java (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/morfologik/src/test/org/apache/lucene/analysis/morfologik/TestMorfologikAnalyzer.java Fri Aug  9 09:03:20 2013
@@ -19,10 +19,9 @@ package org.apache.lucene.analysis.morfo
 
 import java.io.IOException;
 import java.io.Reader;
+import java.io.StringReader;
 import java.util.TreeSet;
 
-import morfologik.stemming.PolishStemmer.DICTIONARY;
-
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
 import org.apache.lucene.analysis.TokenStream;
@@ -66,10 +65,22 @@ public class TestMorfologikAnalyzer exte
     assertAnalyzesToReuse(
         a,
         "T. Gl\u00FCcksberg",
-        new String[] { "to", "tom", "tona", "Gl\u00FCcksberg" },
-        new int[] { 0, 0, 0, 3  },
-        new int[] { 1, 1, 1, 13 },
-        new int[] { 1, 0, 0, 1  });
+        new String[] { "tom", "tona", "Gl\u00FCcksberg" },
+        new int[] { 0, 0, 3  },
+        new int[] { 1, 1, 13 },
+        new int[] { 1, 0, 1  });
+  }
+
+  @SuppressWarnings("unused")
+  private void dumpTokens(String input) throws IOException {
+    TokenStream ts = getTestAnalyzer().tokenStream("dummy", new StringReader(input));
+    ts.reset();
+
+    MorphosyntacticTagsAttribute attribute = ts.getAttribute(MorphosyntacticTagsAttribute.class);
+    CharTermAttribute charTerm = ts.getAttribute(CharTermAttribute.class);
+    while (ts.incrementToken()) {
+      System.out.println(charTerm.toString() + " => " + attribute.getTags());
+    }
   }
 
   /** Test reuse of MorfologikFilter with leftover stems. */
@@ -157,9 +168,8 @@ public class TestMorfologikAnalyzer exte
   /** */
   public final void testKeywordAttrTokens() throws IOException {
     final Version version = TEST_VERSION_CURRENT;
-    final DICTIONARY dictionary = DICTIONARY.COMBINED;
 
-    Analyzer a = new MorfologikAnalyzer(version, dictionary) {
+    Analyzer a = new MorfologikAnalyzer(version) {
       @Override
       protected TokenStreamComponents createComponents(String field, Reader reader) {
         final CharArraySet keywords = new CharArraySet(version, 1, false);
@@ -168,7 +178,7 @@ public class TestMorfologikAnalyzer exte
         final Tokenizer src = new StandardTokenizer(TEST_VERSION_CURRENT, reader);
         TokenStream result = new StandardFilter(TEST_VERSION_CURRENT, src);
         result = new SetKeywordMarkerFilter(result, keywords);
-        result = new MorfologikFilter(result, dictionary, TEST_VERSION_CURRENT); 
+        result = new MorfologikFilter(result, TEST_VERSION_CURRENT); 
 
         return new TokenStreamComponents(src, result);
       }

Modified: lucene/dev/branches/branch_4x/lucene/licenses/morfologik-fsa-LICENSE-BSD.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/licenses/morfologik-fsa-LICENSE-BSD.txt?rev=1512208&r1=1512207&r2=1512208&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/licenses/morfologik-fsa-LICENSE-BSD.txt (original)
+++ lucene/dev/branches/branch_4x/lucene/licenses/morfologik-fsa-LICENSE-BSD.txt Fri Aug  9 09:03:20 2013
@@ -1,6 +1,6 @@
 
 Copyright (c) 2006 Dawid Weiss
-Copyright (c) 2007-2012 Dawid Weiss, Marcin Miłkowski
+Copyright (c) 2007-2013 Dawid Weiss, Marcin Miłkowski
 All rights reserved.
 
 Redistribution and use in source and binary forms, with or without modification, 
@@ -26,4 +26,4 @@ ANY DIRECT, INDIRECT, INCIDENTAL, SPECIA
 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 
 ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 
-SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
\ No newline at end of file

Modified: lucene/dev/branches/branch_4x/lucene/licenses/morfologik-polish-LICENSE-BSD.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/licenses/morfologik-polish-LICENSE-BSD.txt?rev=1512208&r1=1512207&r2=1512208&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/licenses/morfologik-polish-LICENSE-BSD.txt (original)
+++ lucene/dev/branches/branch_4x/lucene/licenses/morfologik-polish-LICENSE-BSD.txt Fri Aug  9 09:03:20 2013
@@ -1,62 +1,26 @@
 BSD-licensed dictionary of Polish (Morfologik)
 
-Copyright (c) 2012, Marcin Miłkowski
+Morfologik Polish dictionary.
+Version: 2.0 PoliMorf
+Copyright (c) 2013, Marcin Miłkowski
 All rights reserved.
 
-Redistribution and  use in  source and binary  forms, with  or without
-modification, are permitted provided that the following conditions are
-met:
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met: 
 
-1. Redistributions of source code must retain the above copyright
-   notice, this list of conditions and the following disclaimer.
+1. Redistributions of source code must retain the above copyright notice, this
+  list of conditions and the following disclaimer. 
+2. Redistributions in binary form must reproduce the above copyright notice,
+  this list of conditions and the following disclaimer in the documentation
+  and/or other materials provided with the distribution. 
 
-2. Redistributions in binary form must reproduce the above copyright
-   notice, this list of conditions and the following disclaimer in the
-   documentation and/or other materials provided with the
-   distribution.
-
-THIS SOFTWARE IS PROVIDED BY COPYRIGHT HOLDERS “AS IS” AND ANY EXPRESS
-OR  IMPLIED WARRANTIES,  INCLUDING, BUT  NOT LIMITED  TO,  THE IMPLIED
-WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED.  IN NO EVENT  SHALL COPYRIGHT  HOLDERS OR  CONTRIBUTORS BE
-LIABLE FOR  ANY DIRECT,  INDIRECT, INCIDENTAL, SPECIAL,  EXEMPLARY, OR
-CONSEQUENTIAL DAMAGES  (INCLUDING, BUT NOT LIMITED  TO, PROCUREMENT OF
-SUBSTITUTE  GOODS OR  SERVICES;  LOSS  OF USE,  DATA,  OR PROFITS;  OR
-BUSINESS INTERRUPTION) HOWEVER CAUSED  AND ON ANY THEORY OF LIABILITY,
-WHETHER IN  CONTRACT, STRICT LIABILITY, OR  TORT (INCLUDING NEGLIGENCE
-OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
-IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
---
-
-BSD-licensed dictionary of Polish (SGJP)
-http://sgjp.pl/morfeusz/
-
-Copyright © 2011 Zygmunt Saloni, Włodzimierz Gruszczyński, 
-	    	 Marcin Woliński, Robert Wołosz
-
-All rights reserved.
-
-Redistribution and  use in  source and binary  forms, with  or without
-modification, are permitted provided that the following conditions are
-met:
-
-1. Redistributions of source code must retain the above copyright
-   notice, this list of conditions and the following disclaimer.
-
-2. Redistributions in binary form must reproduce the above copyright
-   notice, this list of conditions and the following disclaimer in the
-   documentation and/or other materials provided with the
-   distribution.
-
-THIS SOFTWARE IS PROVIDED BY COPYRIGHT HOLDERS “AS IS” AND ANY EXPRESS
-OR  IMPLIED WARRANTIES,  INCLUDING, BUT  NOT LIMITED  TO,  THE IMPLIED
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED.  IN NO EVENT  SHALL COPYRIGHT  HOLDERS OR  CONTRIBUTORS BE
-LIABLE FOR  ANY DIRECT,  INDIRECT, INCIDENTAL, SPECIAL,  EXEMPLARY, OR
-CONSEQUENTIAL DAMAGES  (INCLUDING, BUT NOT LIMITED  TO, PROCUREMENT OF
-SUBSTITUTE  GOODS OR  SERVICES;  LOSS  OF USE,  DATA,  OR PROFITS;  OR
-BUSINESS INTERRUPTION) HOWEVER CAUSED  AND ON ANY THEORY OF LIABILITY,
-WHETHER IN  CONTRACT, STRICT LIABILITY, OR  TORT (INCLUDING NEGLIGENCE
-OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
-IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
\ No newline at end of file
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

Modified: lucene/dev/branches/branch_4x/lucene/licenses/morfologik-polish-NOTICE.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/licenses/morfologik-polish-NOTICE.txt?rev=1512208&r1=1512207&r2=1512208&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/licenses/morfologik-polish-NOTICE.txt (original)
+++ lucene/dev/branches/branch_4x/lucene/licenses/morfologik-polish-NOTICE.txt Fri Aug  9 09:03:20 2013
@@ -1,6 +1,3 @@
 
-This product includes data from BSD-licensed dictionary of Polish (Morfologik)
-(http://morfologik.blogspot.com/)
-
-This product includes data from BSD-licensed dictionary of Polish (SGJP)
-(http://sgjp.pl/morfeusz/)
+This product includes data from BSD-licensed dictionary of Polish (Morfologik, PoliMorf)
+(http://morfologik.blogspot.com/)
\ No newline at end of file

Modified: lucene/dev/branches/branch_4x/lucene/licenses/morfologik-stemming-LICENSE-BSD.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/licenses/morfologik-stemming-LICENSE-BSD.txt?rev=1512208&r1=1512207&r2=1512208&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/licenses/morfologik-stemming-LICENSE-BSD.txt (original)
+++ lucene/dev/branches/branch_4x/lucene/licenses/morfologik-stemming-LICENSE-BSD.txt Fri Aug  9 09:03:20 2013
@@ -1,6 +1,6 @@
 
 Copyright (c) 2006 Dawid Weiss
-Copyright (c) 2007-2012 Dawid Weiss, Marcin Miłkowski
+Copyright (c) 2007-2013 Dawid Weiss, Marcin Miłkowski
 All rights reserved.
 
 Redistribution and use in source and binary forms, with or without modification, 
@@ -26,4 +26,4 @@ ANY DIRECT, INDIRECT, INCIDENTAL, SPECIA
 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 
 ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 
-SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
\ No newline at end of file

Modified: lucene/dev/branches/branch_4x/solr/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/solr/CHANGES.txt?rev=1512208&r1=1512207&r2=1512208&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/solr/CHANGES.txt (original)
+++ lucene/dev/branches/branch_4x/solr/CHANGES.txt Fri Aug  9 09:03:20 2013
@@ -25,7 +25,7 @@ $Id$
 Versions of Major Components
 ---------------------
 Apache Tika 1.4
-Carrot2 3.6.2
+Carrot2 3.8.0
 Velocity 1.7 and Velocity Tools 2.0
 Apache UIMA 2.3.1
 Apache ZooKeeper 3.4.5
@@ -48,6 +48,9 @@ Detailed Change List
 New Features
 ----------------------
 
+* SOLR-5126: Update Carrot2 clustering to version 3.8.0, update Morfologik 
+  to version 1.7.1 (Dawid Weiss)
+
 * SOLR-2345: Enhanced geodist() to work with an RPT field, provided that the
   field is referenced via 'sfield' and the query point is constant.
   (David Smiley)

Modified: lucene/dev/branches/branch_4x/solr/contrib/analysis-extras/ivy.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/solr/contrib/analysis-extras/ivy.xml?rev=1512208&r1=1512207&r2=1512208&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/solr/contrib/analysis-extras/ivy.xml (original)
+++ lucene/dev/branches/branch_4x/solr/contrib/analysis-extras/ivy.xml Fri Aug  9 09:03:20 2013
@@ -20,9 +20,9 @@
     <info organisation="org.apache.solr" module="analysis-extras"/>
     <dependencies>
       <dependency org="com.ibm.icu" name="icu4j" rev="49.1" transitive="false"/>
-      <dependency org="org.carrot2" name="morfologik-polish" rev="1.5.5" transitive="false"/>
-      <dependency org="org.carrot2" name="morfologik-fsa" rev="1.5.5" transitive="false"/>
-      <dependency org="org.carrot2" name="morfologik-stemming" rev="1.5.5" transitive="false"/>
+      <dependency org="org.carrot2" name="morfologik-polish" rev="1.7.1" transitive="false"/>
+      <dependency org="org.carrot2" name="morfologik-fsa" rev="1.7.1" transitive="false"/>
+      <dependency org="org.carrot2" name="morfologik-stemming" rev="1.7.1" transitive="false"/>
       <exclude org="*" ext="*" matcher="regexp" type="${ivy.exclude.types}"/> 
     </dependencies>
 </ivy-module>

Modified: lucene/dev/branches/branch_4x/solr/contrib/clustering/ivy.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/solr/contrib/clustering/ivy.xml?rev=1512208&r1=1512207&r2=1512208&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/solr/contrib/clustering/ivy.xml (original)
+++ lucene/dev/branches/branch_4x/solr/contrib/clustering/ivy.xml Fri Aug  9 09:03:20 2013
@@ -19,14 +19,25 @@
 <ivy-module version="2.0">
     <info organisation="org.apache.solr" module="clustering"/>
     <dependencies>
-      <dependency org="org.carrot2" name="carrot2-mini" rev="3.6.2" transitive="false"/>
-      <dependency org="org.carrot2.attributes" name="attributes-binder" rev="1.0.1" transitive="false"/>
-      <dependency org="com.carrotsearch" name="hppc" rev="0.4.1" transitive="false"/>
+      <dependency org="org.carrot2" name="carrot2-mini" rev="3.8.0" transitive="false"/>
+
+      <dependency org="com.carrotsearch" name="hppc" rev="0.5.2" transitive="false"/>
+      <dependency org="org.carrot2.attributes" name="attributes-binder" rev="1.2.0" transitive="false"/>
+      <dependency org="org.simpleframework" name="simple-xml" rev="2.7" transitive="false"/>
+
+      <dependency org="org.apache.mahout" name="mahout-math" rev="0.6" transitive="false"/>
+      <dependency org="org.apache.mahout" name="mahout-collections" rev="1.0" transitive="false"/>
+
       <dependency org="org.codehaus.jackson" name="jackson-core-asl" rev="1.7.4" transitive="false"/>
       <dependency org="org.codehaus.jackson" name="jackson-mapper-asl" rev="1.7.4" transitive="false"/>
-      <dependency org="org.apache.mahout" name="mahout-collections" rev="1.0" transitive="false"/>
-      <dependency org="org.apache.mahout" name="mahout-math" rev="0.6" transitive="false"/>
-      <dependency org="org.simpleframework" name="simple-xml" rev="2.6.4" transitive="false"/>
+
+      <!--
+      Included as part of Solr's environment.
+
+      com.google.guava:guava:jar:14.0.1:compile
+      commons-lang:commons-lang:jar:2.6:compile
+      -->
+
       <exclude org="*" ext="*" matcher="regexp" type="${ivy.exclude.types}"/> 
     </dependencies>
 </ivy-module>

Modified: lucene/dev/branches/branch_4x/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/DocumentClusteringEngine.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/DocumentClusteringEngine.java?rev=1512208&r1=1512207&r2=1512208&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/DocumentClusteringEngine.java (original)
+++ lucene/dev/branches/branch_4x/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/DocumentClusteringEngine.java Fri Aug  9 09:03:20 2013
@@ -37,8 +37,7 @@ public abstract class DocumentClustering
   public abstract NamedList cluster(SolrParams solrParams);
 
   /**
-   *  Experimental.  Subject to change before the next release
-   *
+   * Experimental.  Subject to change before the next release
    *
    * Cluster the set of docs.  Clustering of documents is often an expensive task that can take a long time.
    * @param docs The docs to cluster.  If null, cluster all docs as in {@link #cluster(org.apache.solr.common.params.SolrParams)}

Modified: lucene/dev/branches/branch_4x/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngine.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngine.java?rev=1512208&r1=1512207&r2=1512208&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngine.java (original)
+++ lucene/dev/branches/branch_4x/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngine.java Fri Aug  9 09:03:20 2013
@@ -77,6 +77,7 @@ import com.google.common.collect.Lists;
 import com.google.common.collect.Maps;
 import com.google.common.collect.Sets;
 import com.google.common.io.Closeables;
+import com.google.common.io.Closer;
 
 /**
  * Search results clustering engine based on Carrot2 clustering algorithms.
@@ -140,7 +141,13 @@ public class CarrotClusteringEngine exte
             + ". Using the default " + resource + " from Carrot JAR.");          
         return new IResource[] {};
       } finally {
-        if (resourceStream != null) Closeables.closeQuietly(resourceStream);
+        if (resourceStream != null) {
+          try {
+            resourceStream.close();
+          } catch (IOException e) {
+            // ignore.
+          }
+        }
       }
 
       log.info("Loaded Solr resource: " + resourceName);

Modified: lucene/dev/branches/branch_4x/solr/contrib/clustering/src/test/org/apache/solr/handler/clustering/ClusteringComponentTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/solr/contrib/clustering/src/test/org/apache/solr/handler/clustering/ClusteringComponentTest.java?rev=1512208&r1=1512207&r2=1512208&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/solr/contrib/clustering/src/test/org/apache/solr/handler/clustering/ClusteringComponentTest.java (original)
+++ lucene/dev/branches/branch_4x/solr/contrib/clustering/src/test/org/apache/solr/handler/clustering/ClusteringComponentTest.java Fri Aug  9 09:03:20 2013
@@ -52,7 +52,7 @@ public class ClusteringComponentTest ext
     SolrRequestHandler handler = core.getRequestHandler("standard");
     SolrQueryResponse rsp;
     rsp = new SolrQueryResponse();
-    rsp.add("responseHeader", new SimpleOrderedMap());
+    rsp.add("responseHeader", new SimpleOrderedMap<Object>());
     SolrQueryRequest req = new LocalSolrQueryRequest(core, params);
     handler.handleRequest(req, rsp);
     NamedList values = rsp.getValues();
@@ -70,7 +70,7 @@ public class ClusteringComponentTest ext
     handler = core.getRequestHandler("docClustering");
 
     rsp = new SolrQueryResponse();
-    rsp.add("responseHeader", new SimpleOrderedMap());
+    rsp.add("responseHeader", new SimpleOrderedMap<Object>());
     req = new LocalSolrQueryRequest(core, params);
     handler.handleRequest(req, rsp);
     values = rsp.getValues();

Modified: lucene/dev/branches/branch_4x/solr/contrib/clustering/src/test/org/apache/solr/handler/clustering/carrot2/EchoClusteringAlgorithm.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/solr/contrib/clustering/src/test/org/apache/solr/handler/clustering/carrot2/EchoClusteringAlgorithm.java?rev=1512208&r1=1512207&r2=1512208&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/solr/contrib/clustering/src/test/org/apache/solr/handler/clustering/carrot2/EchoClusteringAlgorithm.java (original)
+++ lucene/dev/branches/branch_4x/solr/contrib/clustering/src/test/org/apache/solr/handler/clustering/carrot2/EchoClusteringAlgorithm.java Fri Aug  9 09:03:20 2013
@@ -15,7 +15,6 @@ package org.apache.solr.handler.clusteri
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-import java.util.Collections;
 import java.util.List;
 
 import org.carrot2.core.Cluster;

Modified: lucene/dev/branches/branch_4x/solr/licenses/morfologik-fsa-LICENSE-BSD.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/solr/licenses/morfologik-fsa-LICENSE-BSD.txt?rev=1512208&r1=1512207&r2=1512208&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/solr/licenses/morfologik-fsa-LICENSE-BSD.txt (original)
+++ lucene/dev/branches/branch_4x/solr/licenses/morfologik-fsa-LICENSE-BSD.txt Fri Aug  9 09:03:20 2013
@@ -1,6 +1,6 @@
 
 Copyright (c) 2006 Dawid Weiss
-Copyright (c) 2007-2012 Dawid Weiss, Marcin Miłkowski
+Copyright (c) 2007-2013 Dawid Weiss, Marcin Miłkowski
 All rights reserved.
 
 Redistribution and use in source and binary forms, with or without modification, 
@@ -26,4 +26,4 @@ ANY DIRECT, INDIRECT, INCIDENTAL, SPECIA
 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 
 ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 
-SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
\ No newline at end of file

Modified: lucene/dev/branches/branch_4x/solr/licenses/morfologik-polish-LICENSE-BSD.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/solr/licenses/morfologik-polish-LICENSE-BSD.txt?rev=1512208&r1=1512207&r2=1512208&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/solr/licenses/morfologik-polish-LICENSE-BSD.txt (original)
+++ lucene/dev/branches/branch_4x/solr/licenses/morfologik-polish-LICENSE-BSD.txt Fri Aug  9 09:03:20 2013
@@ -1,62 +1,26 @@
 BSD-licensed dictionary of Polish (Morfologik)
 
-Copyright (c) 2012, Marcin Miłkowski
+Morfologik Polish dictionary.
+Version: 2.0 PoliMorf
+Copyright (c) 2013, Marcin Miłkowski
 All rights reserved.
 
-Redistribution and  use in  source and binary  forms, with  or without
-modification, are permitted provided that the following conditions are
-met:
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met: 
 
-1. Redistributions of source code must retain the above copyright
-   notice, this list of conditions and the following disclaimer.
+1. Redistributions of source code must retain the above copyright notice, this
+  list of conditions and the following disclaimer. 
+2. Redistributions in binary form must reproduce the above copyright notice,
+  this list of conditions and the following disclaimer in the documentation
+  and/or other materials provided with the distribution. 
 
-2. Redistributions in binary form must reproduce the above copyright
-   notice, this list of conditions and the following disclaimer in the
-   documentation and/or other materials provided with the
-   distribution.
-
-THIS SOFTWARE IS PROVIDED BY COPYRIGHT HOLDERS “AS IS” AND ANY EXPRESS
-OR  IMPLIED WARRANTIES,  INCLUDING, BUT  NOT LIMITED  TO,  THE IMPLIED
-WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED.  IN NO EVENT  SHALL COPYRIGHT  HOLDERS OR  CONTRIBUTORS BE
-LIABLE FOR  ANY DIRECT,  INDIRECT, INCIDENTAL, SPECIAL,  EXEMPLARY, OR
-CONSEQUENTIAL DAMAGES  (INCLUDING, BUT NOT LIMITED  TO, PROCUREMENT OF
-SUBSTITUTE  GOODS OR  SERVICES;  LOSS  OF USE,  DATA,  OR PROFITS;  OR
-BUSINESS INTERRUPTION) HOWEVER CAUSED  AND ON ANY THEORY OF LIABILITY,
-WHETHER IN  CONTRACT, STRICT LIABILITY, OR  TORT (INCLUDING NEGLIGENCE
-OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
-IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
---
-
-BSD-licensed dictionary of Polish (SGJP)
-http://sgjp.pl/morfeusz/
-
-Copyright © 2011 Zygmunt Saloni, Włodzimierz Gruszczyński, 
-	    	 Marcin Woliński, Robert Wołosz
-
-All rights reserved.
-
-Redistribution and  use in  source and binary  forms, with  or without
-modification, are permitted provided that the following conditions are
-met:
-
-1. Redistributions of source code must retain the above copyright
-   notice, this list of conditions and the following disclaimer.
-
-2. Redistributions in binary form must reproduce the above copyright
-   notice, this list of conditions and the following disclaimer in the
-   documentation and/or other materials provided with the
-   distribution.
-
-THIS SOFTWARE IS PROVIDED BY COPYRIGHT HOLDERS “AS IS” AND ANY EXPRESS
-OR  IMPLIED WARRANTIES,  INCLUDING, BUT  NOT LIMITED  TO,  THE IMPLIED
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED.  IN NO EVENT  SHALL COPYRIGHT  HOLDERS OR  CONTRIBUTORS BE
-LIABLE FOR  ANY DIRECT,  INDIRECT, INCIDENTAL, SPECIAL,  EXEMPLARY, OR
-CONSEQUENTIAL DAMAGES  (INCLUDING, BUT NOT LIMITED  TO, PROCUREMENT OF
-SUBSTITUTE  GOODS OR  SERVICES;  LOSS  OF USE,  DATA,  OR PROFITS;  OR
-BUSINESS INTERRUPTION) HOWEVER CAUSED  AND ON ANY THEORY OF LIABILITY,
-WHETHER IN  CONTRACT, STRICT LIABILITY, OR  TORT (INCLUDING NEGLIGENCE
-OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
-IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
\ No newline at end of file
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

Modified: lucene/dev/branches/branch_4x/solr/licenses/morfologik-polish-NOTICE.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/solr/licenses/morfologik-polish-NOTICE.txt?rev=1512208&r1=1512207&r2=1512208&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/solr/licenses/morfologik-polish-NOTICE.txt (original)
+++ lucene/dev/branches/branch_4x/solr/licenses/morfologik-polish-NOTICE.txt Fri Aug  9 09:03:20 2013
@@ -1,6 +1,3 @@
 
-This product includes data from BSD-licensed dictionary of Polish (Morfologik)
-(http://morfologik.blogspot.com/)
-
-This product includes data from BSD-licensed dictionary of Polish (SGJP)
-(http://sgjp.pl/morfeusz/)
+This product includes data from BSD-licensed dictionary of Polish (Morfologik, PoliMorf)
+(http://morfologik.blogspot.com/)
\ No newline at end of file

Modified: lucene/dev/branches/branch_4x/solr/licenses/morfologik-stemming-LICENSE-BSD.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/solr/licenses/morfologik-stemming-LICENSE-BSD.txt?rev=1512208&r1=1512207&r2=1512208&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/solr/licenses/morfologik-stemming-LICENSE-BSD.txt (original)
+++ lucene/dev/branches/branch_4x/solr/licenses/morfologik-stemming-LICENSE-BSD.txt Fri Aug  9 09:03:20 2013
@@ -1,6 +1,6 @@
 
 Copyright (c) 2006 Dawid Weiss
-Copyright (c) 2007-2012 Dawid Weiss, Marcin Miłkowski
+Copyright (c) 2007-2013 Dawid Weiss, Marcin Miłkowski
 All rights reserved.
 
 Redistribution and use in source and binary forms, with or without modification, 
@@ -26,4 +26,4 @@ ANY DIRECT, INDIRECT, INCIDENTAL, SPECIA
 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 
 ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 
-SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
\ No newline at end of file