You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2011/12/28 17:48:17 UTC
svn commit: r1225227 - in /lucene/dev/branches/branch_3x: ./
dev-tools/eclipse/ lucene/ lucene/contrib/ lucene/contrib/analyzers/
lucene/contrib/analyzers/phonetic/
lucene/contrib/analyzers/phonetic/src/java/org/apache/lucene/analysis/phonetic/
lucene/...
Author: rmuir
Date: Wed Dec 28 16:48:15 2011
New Revision: 1225227
URL: http://svn.apache.org/viewvc?rev=1225227&view=rev
Log:
SOLR-2982: add Beider-Morse phonetic filter
Added:
lucene/dev/branches/branch_3x/lucene/contrib/analyzers/phonetic/ (props changed)
- copied from r1225211, lucene/dev/trunk/modules/analysis/phonetic/
lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/analysis/BeiderMorseFilterFactory.java
- copied unchanged from r1225211, lucene/dev/trunk/solr/core/src/java/org/apache/solr/analysis/BeiderMorseFilterFactory.java
lucene/dev/branches/branch_3x/solr/core/src/test/org/apache/solr/analysis/TestBeiderMorseFilterFactory.java
- copied unchanged from r1225211, lucene/dev/trunk/solr/core/src/test/org/apache/solr/analysis/TestBeiderMorseFilterFactory.java
lucene/dev/branches/branch_3x/solr/lib/commons-codec-1.6.jar
- copied unchanged from r1225211, lucene/dev/trunk/solr/lib/commons-codec-1.6.jar
Removed:
lucene/dev/branches/branch_3x/solr/lib/commons-codec-1.5.jar
Modified:
lucene/dev/branches/branch_3x/ (props changed)
lucene/dev/branches/branch_3x/dev-tools/eclipse/dot.classpath
lucene/dev/branches/branch_3x/lucene/ (props changed)
lucene/dev/branches/branch_3x/lucene/build.xml
lucene/dev/branches/branch_3x/lucene/contrib/analyzers/build.xml
lucene/dev/branches/branch_3x/lucene/contrib/analyzers/phonetic/build.xml
lucene/dev/branches/branch_3x/lucene/contrib/analyzers/phonetic/src/java/org/apache/lucene/analysis/phonetic/BeiderMorseFilter.java
lucene/dev/branches/branch_3x/lucene/contrib/analyzers/phonetic/src/java/org/apache/lucene/analysis/phonetic/DoubleMetaphoneFilter.java
lucene/dev/branches/branch_3x/lucene/contrib/analyzers/phonetic/src/java/org/apache/lucene/analysis/phonetic/PhoneticFilter.java
lucene/dev/branches/branch_3x/lucene/contrib/analyzers/phonetic/src/test/org/apache/lucene/analysis/phonetic/DoubleMetaphoneFilterTest.java
lucene/dev/branches/branch_3x/lucene/contrib/analyzers/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestBeiderMorseFilter.java
lucene/dev/branches/branch_3x/lucene/contrib/analyzers/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestPhoneticFilter.java
lucene/dev/branches/branch_3x/lucene/contrib/benchmark/ (props changed)
lucene/dev/branches/branch_3x/lucene/contrib/contrib-build.xml
lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/analysis/CharTokenizer.java (props changed)
lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/analysis/TestCharTokenizers.java (props changed)
lucene/dev/branches/branch_3x/solr/ (props changed)
lucene/dev/branches/branch_3x/solr/common-build.xml
lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/analysis/DoubleMetaphoneFilter.java
lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/analysis/DoubleMetaphoneFilterFactory.java
lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/analysis/PhoneticFilter.java
lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/analysis/PhoneticFilterFactory.java
lucene/dev/branches/branch_3x/solr/core/src/test/org/apache/solr/analysis/DoubleMetaphoneFilterFactoryTest.java
lucene/dev/branches/branch_3x/solr/core/src/test/org/apache/solr/analysis/DoubleMetaphoneFilterTest.java
lucene/dev/branches/branch_3x/solr/lib/commons-codec-NOTICE.txt
Modified: lucene/dev/branches/branch_3x/dev-tools/eclipse/dot.classpath
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/dev-tools/eclipse/dot.classpath?rev=1225227&r1=1225226&r2=1225227&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/dev-tools/eclipse/dot.classpath (original)
+++ lucene/dev/branches/branch_3x/dev-tools/eclipse/dot.classpath Wed Dec 28 16:48:15 2011
@@ -6,6 +6,8 @@
<classpathentry kind="src" path="lucene/contrib/analyzers/common/src/java"/>
<classpathentry kind="src" path="lucene/contrib/analyzers/common/src/resources"/>
<classpathentry kind="src" path="lucene/contrib/analyzers/common/src/test"/>
+ <classpathentry kind="src" path="lucene/contrib/analyzers/phonetic/src/java"/>
+ <classpathentry kind="src" path="lucene/contrib/analyzers/phonetic/src/test"/>
<classpathentry kind="src" path="lucene/contrib/analyzers/smartcn/src/java"/>
<classpathentry kind="src" path="lucene/contrib/analyzers/smartcn/src/resources"/>
<classpathentry kind="src" path="lucene/contrib/analyzers/smartcn/src/test"/>
@@ -83,6 +85,7 @@
<classpathentry kind="lib" path="lucene/lib/ant-1.7.1.jar"/>
<classpathentry kind="lib" path="lucene/lib/ant-junit-1.7.1.jar"/>
<classpathentry kind="lib" path="lucene/lib/junit-4.7.jar"/>
+ <classpathentry kind="lib" path="lucene/contrib/analyzers/phonetic/lib/commons-codec-1.6.jar"/>
<classpathentry kind="lib" path="lucene/contrib/benchmark/lib/commons-beanutils-1.7.0.jar"/>
<classpathentry kind="lib" path="lucene/contrib/benchmark/lib/commons-collections-3.1.jar"/>
<classpathentry kind="lib" path="lucene/contrib/benchmark/lib/commons-compress-1.2.jar"/>
@@ -92,7 +95,6 @@
<classpathentry kind="lib" path="lucene/contrib/icu/lib/icu4j-4_8_1_1.jar"/>
<classpathentry kind="lib" path="lucene/contrib/queries/lib/jakarta-regexp-1.4.jar"/>
<classpathentry kind="lib" path="solr/lib/apache-solr-noggit-r1099557.jar"/>
- <classpathentry kind="lib" path="solr/lib/commons-codec-1.5.jar"/>
<classpathentry kind="lib" path="solr/lib/commons-csv-1.0-SNAPSHOT-r966014.jar"/>
<classpathentry kind="lib" path="solr/lib/commons-fileupload-1.2.1.jar"/>
<classpathentry kind="lib" path="solr/lib/commons-httpclient-3.1.jar"/>
Modified: lucene/dev/branches/branch_3x/lucene/build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/build.xml?rev=1225227&r1=1225226&r2=1225227&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/build.xml (original)
+++ lucene/dev/branches/branch_3x/lucene/build.xml Wed Dec 28 16:48:15 2011
@@ -255,6 +255,7 @@
<!-- Also remember to keep site.xml in sync. -->
<packageset dir="contrib/analyzers/common/src/java"/>
+ <packageset dir="contrib/analyzers/phonetic/src/java"/>
<packageset dir="contrib/analyzers/smartcn/src/java"/>
<packageset dir="contrib/analyzers/stempel/src/java"/>
<packageset dir="contrib/benchmark/src/java"/>
Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/build.xml?rev=1225227&r1=1225226&r2=1225227&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/build.xml (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/build.xml Wed Dec 28 16:48:15 2011
@@ -32,6 +32,10 @@
<ant dir="common" />
</target>
+ <target name="phonetic">
+ <ant dir="phonetic" />
+ </target>
+
<target name="smartcn">
<ant dir="smartcn" />
</target>
@@ -40,25 +44,29 @@
<ant dir="stempel" />
</target>
- <target name="default" depends="common,smartcn,stempel" />
+ <target name="default" depends="common,phonetic,smartcn,stempel" />
<target name="clean">
<ant dir="common" target="clean" />
+ <ant dir="phonetic" target="clean" />
<ant dir="smartcn" target="clean" />
<ant dir="stempel" target="clean" />
</target>
<target name="compile-core">
<ant dir="common" target="compile-core" />
+ <ant dir="phonetic" target="compile-core" />
<ant dir="smartcn" target="compile-core" />
<ant dir="stempel" target="compile-core" />
</target>
<target name="compile-test">
<ant dir="common" target="compile-test" />
+ <ant dir="phonetic" target="compile-test" />
<ant dir="smartcn" target="compile-test" />
<ant dir="stempel" target="compile-test" />
</target>
<target name="test">
<ant dir="common" target="test" />
+ <ant dir="phonetic" target="test" />
<ant dir="smartcn" target="test" />
<ant dir="stempel" target="test" />
</target>
@@ -67,18 +75,21 @@
<target name="dist-maven" depends="default">
<ant dir="common" target="dist-maven" />
+ <ant dir="phonetic" target="dist-maven" />
<ant dir="smartcn" target="dist-maven" />
<ant dir="stempel" target="dist-maven" />
</target>
<target name="javadocs">
<ant dir="common" target="javadocs" />
+ <ant dir="phonetic" target="javadocs" />
<ant dir="smartcn" target="javadocs" />
<ant dir="stempel" target="javadocs" />
</target>
<target name="javadocs-index.html">
<ant dir="common" target="javadocs-index.html" />
+ <ant dir="phonetic" target="javadocs-index.html" />
<ant dir="smartcn" target="javadocs-index.html" />
<ant dir="stempel" target="javadocs-index.html" />
</target>
Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/phonetic/build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/phonetic/build.xml?rev=1225227&r1=1225211&r2=1225227&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/phonetic/build.xml (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/phonetic/build.xml Wed Dec 28 16:48:15 2011
@@ -17,14 +17,14 @@
limitations under the License.
-->
-<project name="analyzers-phonetic" default="default">
+<project name="phonetic" default="default">
<description>
Provides phonetic encoding support via Apache Commons Codec.
</description>
- <property name="build.dir" location="../build/phonetic" />
- <property name="dist.dir" location="../dist/phonetic" />
+ <property name="build.dir" location="../../../build/contrib/analyzers/phonetic" />
+ <property name="dist.dir" location="../../../dist/contrib/analyzers/phonetic" />
<path id="additional.dependencies">
<fileset dir="lib" includes="commons-codec-*.jar"/>
@@ -35,12 +35,10 @@
refid="additional.dependencies"
/>
- <import file="../../../lucene/contrib/contrib-build.xml"/>
+ <import file="../../contrib-build.xml"/>
<path id="classpath">
<pathelement path="${analyzers-common.jar}"/>
<path refid="base.classpath"/>
</path>
-
- <target name="compile-core" depends="jar-analyzers-common, common.compile-core" />
</project>
Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/phonetic/src/java/org/apache/lucene/analysis/phonetic/BeiderMorseFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/phonetic/src/java/org/apache/lucene/analysis/phonetic/BeiderMorseFilter.java?rev=1225227&r1=1225211&r2=1225227&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/phonetic/src/java/org/apache/lucene/analysis/phonetic/BeiderMorseFilter.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/phonetic/src/java/org/apache/lucene/analysis/phonetic/BeiderMorseFilter.java Wed Dec 28 16:48:15 2011
@@ -56,7 +56,7 @@ public final class BeiderMorseFilter ext
private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
/**
- * Calls {@link #BeiderMorseFilter(TokenStream, PhoneticEngine, LanguageSet)
+ * Calls {@link #BeiderMorseFilter(TokenStream, PhoneticEngine, Languages.LanguageSet)
* BeiderMorseFilter(input, engine, null)}
*/
public BeiderMorseFilter(TokenStream input, PhoneticEngine engine) {
Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/phonetic/src/java/org/apache/lucene/analysis/phonetic/DoubleMetaphoneFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/phonetic/src/java/org/apache/lucene/analysis/phonetic/DoubleMetaphoneFilter.java?rev=1225227&r1=1225211&r2=1225227&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/phonetic/src/java/org/apache/lucene/analysis/phonetic/DoubleMetaphoneFilter.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/phonetic/src/java/org/apache/lucene/analysis/phonetic/DoubleMetaphoneFilter.java Wed Dec 28 16:48:15 2011
@@ -25,7 +25,7 @@ import org.apache.lucene.analysis.TokenS
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
-public final class DoubleMetaphoneFilter extends TokenFilter {
+public class DoubleMetaphoneFilter extends TokenFilter {
private static final String TOKEN_TYPE = "DoubleMetaphone";
@@ -42,7 +42,7 @@ public final class DoubleMetaphoneFilter
}
@Override
- public boolean incrementToken() throws IOException {
+ public final boolean incrementToken() throws IOException {
for(;;) {
if (!remainingTokens.isEmpty()) {
Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/phonetic/src/java/org/apache/lucene/analysis/phonetic/PhoneticFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/phonetic/src/java/org/apache/lucene/analysis/phonetic/PhoneticFilter.java?rev=1225227&r1=1225211&r2=1225227&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/phonetic/src/java/org/apache/lucene/analysis/phonetic/PhoneticFilter.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/phonetic/src/java/org/apache/lucene/analysis/phonetic/PhoneticFilter.java Wed Dec 28 16:48:15 2011
@@ -29,7 +29,7 @@ import java.io.IOException;
* Create tokens for phonetic matches. See:
* http://jakarta.apache.org/commons/codec/api-release/org/apache/commons/codec/language/package-summary.html
*/
-public final class PhoneticFilter extends TokenFilter
+public class PhoneticFilter extends TokenFilter
{
protected boolean inject = true;
protected Encoder encoder = null;
@@ -45,7 +45,7 @@ public final class PhoneticFilter extend
}
@Override
- public boolean incrementToken() throws IOException {
+ public final boolean incrementToken() throws IOException {
if( save != null ) {
// clearAttributes(); // not currently necessary
restoreState(save);
Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/phonetic/src/test/org/apache/lucene/analysis/phonetic/DoubleMetaphoneFilterTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/phonetic/src/test/org/apache/lucene/analysis/phonetic/DoubleMetaphoneFilterTest.java?rev=1225227&r1=1225211&r2=1225227&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/phonetic/src/test/org/apache/lucene/analysis/phonetic/DoubleMetaphoneFilterTest.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/phonetic/src/test/org/apache/lucene/analysis/phonetic/DoubleMetaphoneFilterTest.java Wed Dec 28 16:48:15 2011
@@ -20,7 +20,7 @@ import java.io.StringReader;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.core.WhitespaceTokenizer;
+import org.apache.lucene.analysis.WhitespaceTokenizer;
public class DoubleMetaphoneFilterTest extends BaseTokenStreamTestCase {
Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestBeiderMorseFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestBeiderMorseFilter.java?rev=1225227&r1=1225211&r2=1225227&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestBeiderMorseFilter.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestBeiderMorseFilter.java Wed Dec 28 16:48:15 2011
@@ -27,11 +27,12 @@ import org.apache.commons.codec.language
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.MockTokenizer;
+import org.apache.lucene.analysis.ReusableAnalyzerBase;
import org.apache.lucene.analysis.Tokenizer;
/** Tests {@link BeiderMorseFilter} */
public class TestBeiderMorseFilter extends BaseTokenStreamTestCase {
- private Analyzer analyzer = new Analyzer() {
+ private Analyzer analyzer = new ReusableAnalyzerBase() {
@Override
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
@@ -61,7 +62,7 @@ public class TestBeiderMorseFilter exten
final LanguageSet languages = LanguageSet.from(new HashSet<String>() {{
add("italian"); add("greek"); add("spanish");
}});
- Analyzer analyzer = new Analyzer() {
+ Analyzer analyzer = new ReusableAnalyzerBase() {
@Override
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestPhoneticFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestPhoneticFilter.java?rev=1225227&r1=1225211&r2=1225227&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestPhoneticFilter.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestPhoneticFilter.java Wed Dec 28 16:48:15 2011
@@ -27,7 +27,7 @@ import org.apache.commons.codec.language
import org.apache.commons.codec.language.Soundex;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.Tokenizer;
-import org.apache.lucene.analysis.core.WhitespaceTokenizer;
+import org.apache.lucene.analysis.WhitespaceTokenizer;
/**
* Tests {@link PhoneticFilter}
Modified: lucene/dev/branches/branch_3x/lucene/contrib/contrib-build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/contrib-build.xml?rev=1225227&r1=1225226&r2=1225227&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/contrib-build.xml (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/contrib-build.xml Wed Dec 28 16:48:15 2011
@@ -129,6 +129,17 @@
<property name="analyzers-common.uptodate" value="true"/>
</target>
+ <property name="analyzers-phonetic.jar" value="${common.dir}/build/contrib/analyzers/phonetic/lucene-phonetic-${version}.jar"/>
+ <target name="check-analyzers-phonetic-uptodate" unless="analyzers-phonetic.uptodate">
+ <contrib-uptodate name="analyzers/phonetic" jarfile="${analyzers-phonetic.jar}" property="analyzers-phonetic.uptodate"/>
+ </target>
+ <target name="jar-analyzers-phonetic" unless="analyzers-phonetic.uptodate" depends="check-analyzers-phonetic-uptodate">
+ <ant dir="${common.dir}/contrib/analyzers/phonetic" target="jar-core" inheritall="false">
+ <propertyset refid="uptodate.and.compiled.properties"/>
+ </ant>
+ <property name="analyzers-phonetic.uptodate" value="true"/>
+ </target>
+
<property name="facet.jar" value="${common.dir}/build/contrib/facet/lucene-facet-${version}.jar"/>
<target name="check-facet-uptodate" unless="facet.uptodate">
<contrib-uptodate name="facet" jarfile="${facet.jar}" property="facet.uptodate"/>
Modified: lucene/dev/branches/branch_3x/solr/common-build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/common-build.xml?rev=1225227&r1=1225226&r2=1225227&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/solr/common-build.xml (original)
+++ lucene/dev/branches/branch_3x/solr/common-build.xml Wed Dec 28 16:48:15 2011
@@ -78,6 +78,7 @@
<path id="solr.base.classpath">
<pathelement path="${analyzers-common.jar}"/>
+ <pathelement path="${analyzers-phonetic.jar}"/>
<pathelement path="${highlighter.jar}"/>
<pathelement path="${memory.jar}"/>
<pathelement path="${misc.jar}"/>
@@ -164,7 +165,7 @@
</target>
<target name="prep-lucene-jars"
- depends="jar-lucene-core, jar-analyzers-common, jar-spellchecker, jar-highlighter,
+ depends="jar-lucene-core, jar-analyzers-common, jar-analyzers-phonetic, jar-spellchecker, jar-highlighter,
jar-memory, jar-misc, jar-spatial, jar-grouping, jar-queries"/>
<target name="lucene-jars-to-solr" depends="prep-lucene-jars">
@@ -172,6 +173,7 @@
<copy todir="${lucene-libs}" preservelastmodified="true" flatten="true" failonerror="true" overwrite="true">
<fileset file="${lucene-core.jar}" />
<fileset file="${analyzers-common.jar}" />
+ <fileset file="${analyzers-phonetic.jar}" />
<fileset file="${spellchecker.jar}" />
<fileset file="${grouping.jar}" />
<fileset file="${queries.jar}" />
Modified: lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/analysis/DoubleMetaphoneFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/analysis/DoubleMetaphoneFilter.java?rev=1225227&r1=1225226&r2=1225227&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/analysis/DoubleMetaphoneFilter.java (original)
+++ lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/analysis/DoubleMetaphoneFilter.java Wed Dec 28 16:48:15 2011
@@ -16,93 +16,13 @@
*/
package org.apache.solr.analysis;
-import java.io.IOException;
-import java.util.LinkedList;
-
-import org.apache.commons.codec.language.DoubleMetaphone;
-import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
-
-public final class DoubleMetaphoneFilter extends TokenFilter {
-
- private static final String TOKEN_TYPE = "DoubleMetaphone";
-
- private final LinkedList<State> remainingTokens = new LinkedList<State>();
- private final DoubleMetaphone encoder = new DoubleMetaphone();
- private final boolean inject;
- private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
- private final PositionIncrementAttribute posAtt = addAttribute(PositionIncrementAttribute.class);
-
- protected DoubleMetaphoneFilter(TokenStream input, int maxCodeLength, boolean inject) {
- super(input);
- this.encoder.setMaxCodeLen(maxCodeLength);
- this.inject = inject;
- }
-
- @Override
- public boolean incrementToken() throws IOException {
- for(;;) {
-
- if (!remainingTokens.isEmpty()) {
- // clearAttributes(); // not currently necessary
- restoreState(remainingTokens.removeFirst());
- return true;
- }
-
- if (!input.incrementToken()) return false;
- int len = termAtt.length();
- if (len==0) return true; // pass through zero length terms
-
- int firstAlternativeIncrement = inject ? 0 : posAtt.getPositionIncrement();
-
- String v = termAtt.toString();
- String primaryPhoneticValue = encoder.doubleMetaphone(v);
- String alternatePhoneticValue = encoder.doubleMetaphone(v, true);
-
- // a flag to lazily save state if needed... this avoids a save/restore when only
- // one token will be generated.
- boolean saveState=inject;
-
- if (primaryPhoneticValue!=null && primaryPhoneticValue.length() > 0 && !primaryPhoneticValue.equals(v)) {
- if (saveState) {
- remainingTokens.addLast(captureState());
- }
- posAtt.setPositionIncrement( firstAlternativeIncrement );
- firstAlternativeIncrement = 0;
- termAtt.setEmpty().append(primaryPhoneticValue);
- saveState = true;
- }
-
- if (alternatePhoneticValue!=null && alternatePhoneticValue.length() > 0
- && !alternatePhoneticValue.equals(primaryPhoneticValue)
- && !primaryPhoneticValue.equals(v)) {
- if (saveState) {
- remainingTokens.addLast(captureState());
- saveState = false;
- }
- posAtt.setPositionIncrement( firstAlternativeIncrement );
- termAtt.setEmpty().append(alternatePhoneticValue);
- saveState = true;
- }
-
- // Just one token to return, so no need to capture/restore
- // any state, simply return it.
- if (remainingTokens.isEmpty()) {
- return true;
- }
-
- if (saveState) {
- remainingTokens.addLast(captureState());
- }
- }
- }
+/** @deprecated Use {@link org.apache.lucene.analysis.phonetic.DoubleMetaphoneFilter} instead */
+@Deprecated
+public final class DoubleMetaphoneFilter extends org.apache.lucene.analysis.phonetic.DoubleMetaphoneFilter {
- @Override
- public void reset() throws IOException {
- input.reset();
- remainingTokens.clear();
+ public DoubleMetaphoneFilter(TokenStream input, int maxCodeLength, boolean inject) {
+ super(input, maxCodeLength, inject);
}
}
Modified: lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/analysis/DoubleMetaphoneFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/analysis/DoubleMetaphoneFilterFactory.java?rev=1225227&r1=1225226&r2=1225227&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/analysis/DoubleMetaphoneFilterFactory.java (original)
+++ lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/analysis/DoubleMetaphoneFilterFactory.java Wed Dec 28 16:48:15 2011
@@ -18,10 +18,11 @@ package org.apache.solr.analysis;
import java.util.Map;
+import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
/**
- * Factory for {@link DoubleMetaphoneFilter}.
+ * Factory for {@link org.apache.lucene.analysis.phonetic.DoubleMetaphoneFilter}.
* <pre class="prettyprint" >
* <fieldType name="text_dblmtphn" class="solr.TextField" positionIncrementGap="100">
* <analyzer>
@@ -52,7 +53,7 @@ public class DoubleMetaphoneFilterFactor
}
}
- public DoubleMetaphoneFilter create(TokenStream input) {
- return new DoubleMetaphoneFilter(input, maxCodeLength, inject);
+ public TokenFilter create(TokenStream input) {
+ return new org.apache.lucene.analysis.phonetic.DoubleMetaphoneFilter(input, maxCodeLength, inject);
}
}
Modified: lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/analysis/PhoneticFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/analysis/PhoneticFilter.java?rev=1225227&r1=1225226&r2=1225227&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/analysis/PhoneticFilter.java (original)
+++ lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/analysis/PhoneticFilter.java Wed Dec 28 16:48:15 2011
@@ -18,81 +18,16 @@
package org.apache.solr.analysis;
import org.apache.commons.codec.Encoder;
-import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
-
-import java.io.IOException;
/**
- * Create tokens for phonetic matches. See:
- * http://jakarta.apache.org/commons/codec/api-release/org/apache/commons/codec/language/package-summary.html
- *
- * @version $Id$
+ * @deprecated Use {@link org.apache.lucene.analysis.phonetic.PhoneticFilter} instead.
*/
-public final class PhoneticFilter extends TokenFilter
+@Deprecated
+public final class PhoneticFilter extends org.apache.lucene.analysis.phonetic.PhoneticFilter
{
- protected boolean inject = true;
- protected Encoder encoder = null;
- protected String name = null;
-
- protected State save = null;
- private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
- private final PositionIncrementAttribute posAtt = addAttribute(PositionIncrementAttribute.class);
public PhoneticFilter(TokenStream in, Encoder encoder, String name, boolean inject) {
- super(in);
- this.encoder = encoder;
- this.name = name;
- this.inject = inject;
- }
-
- @Override
- public boolean incrementToken() throws IOException {
- if( save != null ) {
- // clearAttributes(); // not currently necessary
- restoreState(save);
- save = null;
- return true;
- }
-
- if (!input.incrementToken()) return false;
-
- // pass through zero-length terms
- if (termAtt.length() == 0) return true;
-
- String value = termAtt.toString();
- String phonetic = null;
- try {
- String v = encoder.encode(value).toString();
- if (v.length() > 0 && !value.equals(v)) phonetic = v;
- } catch (Exception ignored) {} // just use the direct text
-
- if (phonetic == null) return true;
-
- if (!inject) {
- // just modify this token
- termAtt.setEmpty().append(phonetic);
- return true;
- }
-
- // We need to return both the original and the phonetic tokens.
- // to avoid a orig=captureState() change_to_phonetic() saved=captureState() restoreState(orig)
- // we return the phonetic alternative first
-
- int origOffset = posAtt.getPositionIncrement();
- posAtt.setPositionIncrement(0);
- save = captureState();
-
- posAtt.setPositionIncrement(origOffset);
- termAtt.setEmpty().append(phonetic);
- return true;
- }
-
- @Override
- public void reset() throws IOException {
- input.reset();
- save = null;
+ super(in, encoder, inject);
}
}
Modified: lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/analysis/PhoneticFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/analysis/PhoneticFilterFactory.java?rev=1225227&r1=1225226&r2=1225227&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/analysis/PhoneticFilterFactory.java (original)
+++ lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/analysis/PhoneticFilterFactory.java Wed Dec 28 16:48:15 2011
@@ -31,11 +31,12 @@ import org.apache.commons.codec.language
import org.apache.commons.codec.language.Metaphone;
import org.apache.commons.codec.language.RefinedSoundex;
import org.apache.commons.codec.language.Soundex;
+import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.solr.common.SolrException;
/**
- * Factory for {@link PhoneticFilter}.
+ * Factory for {@link org.apache.lucene.analysis.phonetic.PhoneticFilter}.
*
* Create tokens based on phonetic encoders
*
@@ -55,7 +56,7 @@ import org.apache.solr.common.SolrExcept
* </fieldType></pre>
*
* @version $Id$
- * @see PhoneticFilter
+ * @see org.apache.lucene.analysis.phonetic.PhoneticFilter
*/
public class PhoneticFilterFactory extends BaseTokenFilterFactory
{
@@ -138,7 +139,7 @@ public class PhoneticFilterFactory exten
return clazz;
}
- public PhoneticFilter create(TokenStream input) {
- return new PhoneticFilter(input,encoder,name,inject);
+ public TokenFilter create(TokenStream input) {
+ return new org.apache.lucene.analysis.phonetic.PhoneticFilter(input,encoder,inject);
}
}
Modified: lucene/dev/branches/branch_3x/solr/core/src/test/org/apache/solr/analysis/DoubleMetaphoneFilterFactoryTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/core/src/test/org/apache/solr/analysis/DoubleMetaphoneFilterFactoryTest.java?rev=1225227&r1=1225226&r2=1225227&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/solr/core/src/test/org/apache/solr/analysis/DoubleMetaphoneFilterFactoryTest.java (original)
+++ lucene/dev/branches/branch_3x/solr/core/src/test/org/apache/solr/analysis/DoubleMetaphoneFilterFactoryTest.java Wed Dec 28 16:48:15 2011
@@ -32,7 +32,7 @@ public class DoubleMetaphoneFilterFactor
TokenStream inputStream = new MockTokenizer(new StringReader("international"), MockTokenizer.WHITESPACE, false);
TokenStream filteredStream = factory.create(inputStream);
- assertEquals(DoubleMetaphoneFilter.class, filteredStream.getClass());
+ assertEquals(org.apache.lucene.analysis.phonetic.DoubleMetaphoneFilter.class, filteredStream.getClass());
assertTokenStreamContents(filteredStream, new String[] { "international", "ANTR" });
}
@@ -46,7 +46,7 @@ public class DoubleMetaphoneFilterFactor
TokenStream inputStream = new MockTokenizer(new StringReader("international"), MockTokenizer.WHITESPACE, false);
TokenStream filteredStream = factory.create(inputStream);
- assertEquals(DoubleMetaphoneFilter.class, filteredStream.getClass());
+ assertEquals(org.apache.lucene.analysis.phonetic.DoubleMetaphoneFilter.class, filteredStream.getClass());
assertTokenStreamContents(filteredStream, new String[] { "ANTRNXNL" });
}
@@ -60,7 +60,7 @@ public class DoubleMetaphoneFilterFactor
TokenStream filteredStream = factory.create(inputStream);
CharTermAttribute termAtt = filteredStream.addAttribute(CharTermAttribute.class);
- assertEquals(DoubleMetaphoneFilter.class, filteredStream.getClass());
+ assertEquals(org.apache.lucene.analysis.phonetic.DoubleMetaphoneFilter.class, filteredStream.getClass());
filteredStream.reset();
assertTrue(filteredStream.incrementToken());
Modified: lucene/dev/branches/branch_3x/solr/core/src/test/org/apache/solr/analysis/DoubleMetaphoneFilterTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/core/src/test/org/apache/solr/analysis/DoubleMetaphoneFilterTest.java?rev=1225227&r1=1225226&r2=1225227&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/solr/core/src/test/org/apache/solr/analysis/DoubleMetaphoneFilterTest.java (original)
+++ lucene/dev/branches/branch_3x/solr/core/src/test/org/apache/solr/analysis/DoubleMetaphoneFilterTest.java Wed Dec 28 16:48:15 2011
@@ -21,6 +21,8 @@ import java.io.StringReader;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.WhitespaceTokenizer;
+/** @Deprecated: testing backwards compat */
+@Deprecated
public class DoubleMetaphoneFilterTest extends BaseTokenTestCase {
public void testSize4FalseInject() throws Exception {
Modified: lucene/dev/branches/branch_3x/solr/lib/commons-codec-NOTICE.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/lib/commons-codec-NOTICE.txt?rev=1225227&r1=1225226&r2=1225227&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/solr/lib/commons-codec-NOTICE.txt (original)
+++ lucene/dev/branches/branch_3x/solr/lib/commons-codec-NOTICE.txt Wed Dec 28 16:48:15 2011
@@ -1,5 +1,5 @@
Apache Commons Codec
-Copyright 2002-2009 The Apache Software Foundation
+Copyright 2002-2011 The Apache Software Foundation
This product includes software developed by
The Apache Software Foundation (http://www.apache.org/).