You are viewing a plain text version of this content. The canonical link for it is here.
Posted to java-commits@lucene.apache.org by mi...@apache.org on 2009/06/15 23:19:03 UTC
svn commit: r784984 - in /lucene/java/trunk/src/java/org/apache/lucene:
analysis/standard/StandardAnalyzer.java util/Version.java
Author: mikemccand
Date: Mon Jun 15 21:19:03 2009
New Revision: 784984
URL: http://svn.apache.org/viewvc?rev=784984&view=rev
Log:
LUCENE-1684: add matchVersion to StandardAnalyzer, and improve defaults if version is 2.9+
Added:
lucene/java/trunk/src/java/org/apache/lucene/util/Version.java
Modified:
lucene/java/trunk/src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java
Modified: lucene/java/trunk/src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java?rev=784984&r1=784983&r2=784984&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java Mon Jun 15 21:19:03 2009
@@ -18,6 +18,7 @@
*/
import org.apache.lucene.analysis.*;
+import org.apache.lucene.util.Version;
import java.io.File;
import java.io.IOException;
@@ -26,12 +27,24 @@
/**
* Filters {@link StandardTokenizer} with {@link StandardFilter}, {@link
- * LowerCaseFilter} and {@link StopFilter}, using a list of English stop words.
+ * LowerCaseFilter} and {@link StopFilter}, using a list of
+ * English stop words.
+ *
+ * <a name="version"/>
+ * <p>You must specify the required {@link Version}
+ * compatibility when creating StandardAnalyzer:
+ * <ul>
+ * <li> As of 2.9, StopFilter preserves position
+ * increments by default
+ * <li> As of 2.9, Tokens incorrectly identified as acronyms
+ * are corrected (see <a href="https://issues.apache.org/jira/browse/LUCENE-1068">LUCENE-1608</a>
+ * </ul>
*
* @version $Id$
*/
public class StandardAnalyzer extends Analyzer {
private Set stopSet;
+ private Version matchVersion;
/**
* Specifies whether deprecated acronyms should be replaced with HOST type.
@@ -94,87 +107,92 @@
/** Builds an analyzer with the default stop words ({@link
* #STOP_WORDS}).
- * @deprecated Use {@link #StandardAnalyzer(boolean, String[])},
- * passing in null for the stop words, instead */
+ * @deprecated Use {@link #StandardAnalyzer(Version)},
+ * instead. */
public StandardAnalyzer() {
- this(STOP_WORDS);
+ this(Version.LUCENE_24, STOP_WORDS);
+ }
+
+ /** Builds an analyzer with the default stop words ({@link
+ * #STOP_WORDS}).
+ * @param matchVersion Lucene version to match See {@link
+ * <a href="#version">above</a>}
+ */
+ public StandardAnalyzer(Version matchVersion) {
+ this(matchVersion, STOP_WORDS);
}
/** Builds an analyzer with the given stop words.
- * @deprecated Use {@link #StandardAnalyzer(boolean, Set)}
+ * @deprecated Use {@link #StandardAnalyzer(Version, Set)}
* instead */
public StandardAnalyzer(Set stopWords) {
- stopSet = stopWords;
- useDefaultStopPositionIncrements = true;
+ this(Version.LUCENE_24, stopWords);
}
/** Builds an analyzer with the given stop words.
- * @param enableStopPositionIncrements See {@link
- * StopFilter#setEnablePositionIncrements}
+ * @param matchVersion Lucene version to match See {@link
+ * <a href="#version">above</a>}
* @param stopWords stop words */
- public StandardAnalyzer(boolean enableStopPositionIncrements, Set stopWords) {
+ public StandardAnalyzer(Version matchVersion, Set stopWords) {
stopSet = stopWords;
- this.enableStopPositionIncrements = enableStopPositionIncrements;
+ init(matchVersion);
}
/** Builds an analyzer with the given stop words.
- * @deprecated Use {@link #StandardAnalyzer(boolean,
+ * @deprecated Use {@link #StandardAnalyzer(Version,
* String[])} instead */
public StandardAnalyzer(String[] stopWords) {
- if (stopWords == null) {
- stopWords = STOP_WORDS;
- }
- stopSet = StopFilter.makeStopSet(stopWords);
- useDefaultStopPositionIncrements = true;
+ this(Version.LUCENE_24, stopWords);
}
/** Builds an analyzer with the given stop words.
- * @param enableStopPositionIncrements See {@link
- * StopFilter#setEnablePositionIncrements}
+ * @param matchVersion Lucene version to match See {@link
+ * <a href="#version">above</a>}
* @param stopWords Array of stop words */
- public StandardAnalyzer(boolean enableStopPositionIncrements, String[] stopWords) {
+ public StandardAnalyzer(Version matchVersion, String[] stopWords) {
+ if (stopWords == null) {
+ stopWords = STOP_WORDS;
+ }
stopSet = StopFilter.makeStopSet(stopWords);
- this.enableStopPositionIncrements = enableStopPositionIncrements;
+ init(matchVersion);
}
/** Builds an analyzer with the stop words from the given file.
* @see WordlistLoader#getWordSet(File)
- * @deprecated Use {@link #StandardAnalyzer(boolean, File)}
+ * @deprecated Use {@link #StandardAnalyzer(Version, File)}
* instead
*/
public StandardAnalyzer(File stopwords) throws IOException {
- stopSet = WordlistLoader.getWordSet(stopwords);
- useDefaultStopPositionIncrements = true;
+ this(Version.LUCENE_24, stopwords);
}
/** Builds an analyzer with the stop words from the given file.
* @see WordlistLoader#getWordSet(File)
- * @param enableStopPositionIncrements See {@link
- * StopFilter#setEnablePositionIncrements}
+ * @param matchVersion Lucene version to match See {@link
+ * <a href="#version">above</a>}
* @param stopwords File to read stop words from */
- public StandardAnalyzer(boolean enableStopPositionIncrements, File stopwords) throws IOException {
+ public StandardAnalyzer(Version matchVersion, File stopwords) throws IOException {
stopSet = WordlistLoader.getWordSet(stopwords);
- this.enableStopPositionIncrements = enableStopPositionIncrements;
+ init(matchVersion);
}
/** Builds an analyzer with the stop words from the given reader.
* @see WordlistLoader#getWordSet(Reader)
- * @deprecated Use {@link #StandardAnalyzer(boolean, Reader)}
+ * @deprecated Use {@link #StandardAnalyzer(Version, Reader)}
* instead
*/
public StandardAnalyzer(Reader stopwords) throws IOException {
- stopSet = WordlistLoader.getWordSet(stopwords);
- useDefaultStopPositionIncrements = true;
+ this(Version.LUCENE_24, stopwords);
}
/** Builds an analyzer with the stop words from the given reader.
* @see WordlistLoader#getWordSet(Reader)
- * @param enableStopPositionIncrements See {@link
- * StopFilter#setEnablePositionIncrements}
+ * @param matchVersion Lucene version to match See {@link
+ * <a href="#version">above</a>}
* @param stopwords Reader to read stop words from */
- public StandardAnalyzer(boolean enableStopPositionIncrements, Reader stopwords) throws IOException {
+ public StandardAnalyzer(Version matchVersion, Reader stopwords) throws IOException {
stopSet = WordlistLoader.getWordSet(stopwords);
- this.enableStopPositionIncrements = enableStopPositionIncrements;
+ init(matchVersion);
}
/**
@@ -186,9 +204,8 @@
* @deprecated Remove in 3.X and make true the only valid value
*/
public StandardAnalyzer(boolean replaceInvalidAcronym) {
- this(STOP_WORDS);
+ this(Version.LUCENE_24, STOP_WORDS);
this.replaceInvalidAcronym = replaceInvalidAcronym;
- useDefaultStopPositionIncrements = true;
}
/**
@@ -200,9 +217,8 @@
* @deprecated Remove in 3.X and make true the only valid value
*/
public StandardAnalyzer(Reader stopwords, boolean replaceInvalidAcronym) throws IOException{
- this(stopwords);
+ this(Version.LUCENE_24, stopwords);
this.replaceInvalidAcronym = replaceInvalidAcronym;
- useDefaultStopPositionIncrements = true;
}
/**
@@ -214,9 +230,8 @@
* @deprecated Remove in 3.X and make true the only valid value
*/
public StandardAnalyzer(File stopwords, boolean replaceInvalidAcronym) throws IOException{
- this(stopwords);
+ this(Version.LUCENE_24, stopwords);
this.replaceInvalidAcronym = replaceInvalidAcronym;
- useDefaultStopPositionIncrements = true;
}
/**
@@ -229,9 +244,8 @@
* @deprecated Remove in 3.X and make true the only valid value
*/
public StandardAnalyzer(String [] stopwords, boolean replaceInvalidAcronym) throws IOException{
- this(stopwords);
+ this(Version.LUCENE_24, stopwords);
this.replaceInvalidAcronym = replaceInvalidAcronym;
- useDefaultStopPositionIncrements = true;
}
/**
@@ -243,9 +257,17 @@
* @deprecated Remove in 3.X and make true the only valid value
*/
public StandardAnalyzer(Set stopwords, boolean replaceInvalidAcronym) throws IOException{
- this(stopwords);
+ this(Version.LUCENE_24, stopwords);
this.replaceInvalidAcronym = replaceInvalidAcronym;
- useDefaultStopPositionIncrements = true;
+ }
+
+ private final void init(Version matchVersion) {
+ this.matchVersion = matchVersion;
+ if (matchVersion.onOrAfter(Version.LUCENE_29)) {
+ enableStopPositionIncrements = true;
+ } else {
+ useDefaultStopPositionIncrements = true;
+ }
}
/** Constructs a {@link StandardTokenizer} filtered by a {@link
@@ -289,7 +311,8 @@
public int getMaxTokenLength() {
return maxTokenLength;
}
-
+
+ /** @deprecated Use {@link #tokenStream} instead */
public TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException {
SavedStreams streams = (SavedStreams) getPreviousTokenStream();
if (streams == null) {
Added: lucene/java/trunk/src/java/org/apache/lucene/util/Version.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/util/Version.java?rev=784984&view=auto
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/util/Version.java (added)
+++ lucene/java/trunk/src/java/org/apache/lucene/util/Version.java Mon Jun 15 21:19:03 2009
@@ -0,0 +1,58 @@
+package org.apache.lucene.util;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+import java.io.Serializable;
+
+/**
+ * Use by certain classes to match version compatibility
+ * across releases of Lucene.
+ */
+public final class Version extends Parameter implements Serializable {
+
+ /** Use this to get the latest & greatest settings, bug
+ * fixes, etc, for Lucene.
+ *
+ * <p><b>WARNING</b>: if you use this setting, and then
+ * upgrade to a newer release of Lucene, sizable changes
+ * may happen. If precise back compatibility is important
+ * then you should instead explicitly specify an actual
+ * version.
+ */
+ public static final Version LUCENE_CURRENT = new Version("LUCENE_CURRENT", 0);
+
+ /** Match settings and bugs in Lucene's 2.4 release.
+ * @deprecated This will be removed in 3.0 */
+ public static final Version LUCENE_24 = new Version("LUCENE_24", 2400);
+
+ /** Match settings and bugs in Lucene's 2.9 release.
+ * @deprecated This will be removed in 3.0 */
+ public static final Version LUCENE_29 = new Version("LUCENE_29", 2900);
+
+ private final int v;
+
+ public Version(String name, int v) {
+ super(name);
+ this.v = v;
+ }
+
+ public boolean onOrAfter(Version other) {
+ return v == 0 || v >= other.v;
+ }
+}
\ No newline at end of file