You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by bu...@apache.org on 2010/07/22 21:34:52 UTC
svn commit: r966819 [15/20] - in /lucene/dev/branches/realtime_search: ./
lucene/ lucene/backwards/ lucene/contrib/ lucene/contrib/benchmark/conf/
lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/
lucene/contrib/benchmark/src/...
Modified: lucene/dev/branches/realtime_search/modules/analysis/NOTICE.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/modules/analysis/NOTICE.txt?rev=966819&r1=966818&r2=966819&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/modules/analysis/NOTICE.txt (original)
+++ lucene/dev/branches/realtime_search/modules/analysis/NOTICE.txt Thu Jul 22 19:34:35 2010
@@ -4,6 +4,10 @@ Copyright 2006 The Apache Software Found
This product includes software developed by
The Apache Software Foundation (http://www.apache.org/).
+Includes software from other Apache Software Foundation projects,
+including, but not limited to:
+ - Apache Commons
+
The snowball stemmers in
common/src/java/net/sf/snowball
were developed by Martin Porter and Richard Boulton.
@@ -13,30 +17,29 @@ were developed by Martin Porter and Rich
The full snowball package is available from
http://snowball.tartarus.org/
-The Arabic stemmer (common) comes with a default
-stopword list that is BSD-licensed created by Jacques Savoy. The file resides in
-common/src/resources/org/apache/lucene/analysis/ar/stopwords.txt.
-See http://members.unine.ch/jacques.savoy/clef/index.html.
-
-The Persian analyzer (common) comes with a default
-stopword list that is BSD-licensed created by Jacques Savoy. The file resides in
-common/src/resources/org/apache/lucene/analysis/fa/stopwords.txt.
-See http://members.unine.ch/jacques.savoy/clef/index.html.
-
-The Romanian analyzer (common) comes with a default
-stopword list that is BSD-licensed created by Jacques Savoy. The file resides in
-common/src/resources/org/apache/lucene/analysis/ro/stopwords.txt.
-See http://members.unine.ch/jacques.savoy/clef/index.html.
-
-The Bulgarian analyzer (common) comes with a default
-stopword list that is BSD-licensed created by Jacques Savoy. The file resides in
-common/src/resources/org/apache/lucene/analysis/bg/stopwords.txt.
+The Arabic,Persian,Romanian,Bulgarian, and Hindi analyzers (common) come with a default
+stopword list that is BSD-licensed created by Jacques Savoy. These files reside in:
+common/src/resources/org/apache/lucene/analysis/ar/stopwords.txt,
+common/src/resources/org/apache/lucene/analysis/fa/stopwords.txt,
+common/src/resources/org/apache/lucene/analysis/ro/stopwords.txt,
+common/src/resources/org/apache/lucene/analysis/bg/stopwords.txt,
+common/src/resources/org/apache/lucene/analysis/hi/stopwords.txt
See http://members.unine.ch/jacques.savoy/clef/index.html.
-The Hindi analyzer (common) comes with a default
-stopword list that is BSD-licensed created by Jacques Savoy. The file resides in
-common/src/resources/org/apache/lucene/analysis/hi/stopwords.txt.
-See http://members.unine.ch/jacques.savoy/clef/index.html.
+The German,Spanish,Finnish,French,Hungarian,Italian,Portuguese,Russian and Swedish light stemmers
+(common) are based on BSD-licensed reference implementations created by Jacques Savoy and
+Ljiljana Dolamic. These files reside in:
+common/src/java/org/apache/lucene/analysis/de/GermanLightStemmer.java
+common/src/java/org/apache/lucene/analysis/de/GermanMinimalStemmer.java
+common/src/java/org/apache/lucene/analysis/es/SpanishLightStemmer.java
+common/src/java/org/apache/lucene/analysis/fi/FinnishLightStemmer.java
+common/src/java/org/apache/lucene/analysis/fr/FrenchLightStemmer.java
+common/src/java/org/apache/lucene/analysis/fr/FrenchMinimalStemmer.java
+common/src/java/org/apache/lucene/analysis/hu/HungarianLightStemmer.java
+common/src/java/org/apache/lucene/analysis/it/ItalianLightStemmer.java
+common/src/java/org/apache/lucene/analysis/pt/PortugueseLightStemmer.java
+common/src/java/org/apache/lucene/analysis/ru/RussianLightStemmer.java
+common/src/java/org/apache/lucene/analysis/sv/SwedishLightStemmer.java
The Stempel analyzer (stempel) includes BSD-licensed software developed
by the Egothor project http://egothor.sf.net/, created by Leo Galambos, Martin Kvapil,
Modified: lucene/dev/branches/realtime_search/modules/analysis/README.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/modules/analysis/README.txt?rev=966819&r1=966818&r2=966819&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/modules/analysis/README.txt (original)
+++ lucene/dev/branches/realtime_search/modules/analysis/README.txt Thu Jul 22 19:34:35 2010
@@ -20,7 +20,12 @@ lucene-analyzers-common-XX.jar
lucene-analyzers-icu-XX.jar
An add-on analysis library that provides improved Unicode support via
International Components for Unicode (ICU). Note: this module depends on
- the ICU4j jar file (version > 4.4.0)
+ the ICU4j jar file (version >= 4.4.0)
+
+lucene-analyzers-phonetic-XX.jar
+ An add-on analysis library that provides phonetic encoders via Apache
+ Commons-Codec. Note: this module depends on the commons-codec jar
+ file (version >= 1.4)
lucene-analyzers-smartcn-XX.jar
An add-on analysis library that provides word segmentation for Simplified
@@ -32,12 +37,14 @@ lucene-analyzers-stempel-XX.jar
common/src/java
icu/src/java
+phonetic/src/java
smartcn/src/java
stempel/src/java
- The source code for the four libraries.
+ The source code for the ffve libraries.
common/src/test
icu/src/test
+phonetic/src/test
smartcn/src/test
stempel/src/test
- Unit tests for the four libraries.
+ Unit tests for the five libraries.
Modified: lucene/dev/branches/realtime_search/modules/analysis/build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/modules/analysis/build.xml?rev=966819&r1=966818&r2=966819&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/modules/analysis/build.xml (original)
+++ lucene/dev/branches/realtime_search/modules/analysis/build.xml Thu Jul 22 19:34:35 2010
@@ -35,6 +35,10 @@
<ant dir="icu" />
</target>
+ <target name="phonetic">
+ <ant dir="phonetic" />
+ </target>
+
<target name="smartcn">
<ant dir="smartcn" />
</target>
@@ -44,29 +48,33 @@
</target>
<target name="default" depends="compile"/>
- <target name="compile" depends="common,icu,smartcn,stempel" />
+ <target name="compile" depends="common,icu,phonetic,smartcn,stempel" />
<target name="clean">
<ant dir="common" target="clean" />
<ant dir="icu" target="clean" />
+ <ant dir="phonetic" target="clean" />
<ant dir="smartcn" target="clean" />
<ant dir="stempel" target="clean" />
</target>
<target name="compile-core">
<ant dir="common" target="compile-core" />
<ant dir="icu" target="compile-core" />
+ <ant dir="phonetic" target="compile-core" />
<ant dir="smartcn" target="compile-core" />
<ant dir="stempel" target="compile-core" />
</target>
<target name="compile-test">
<ant dir="common" target="compile-test" />
<ant dir="icu" target="compile-test" />
+ <ant dir="phonetic" target="compile-test" />
<ant dir="smartcn" target="compile-test" />
<ant dir="stempel" target="compile-test" />
</target>
<target name="test">
<ant dir="common" target="test" />
<ant dir="icu" target="test" />
+ <ant dir="phonetic" target="test" />
<ant dir="smartcn" target="test" />
<ant dir="stempel" target="test" />
</target>
@@ -76,6 +84,7 @@
<target name="dist-maven" depends="default">
<ant dir="common" target="dist-maven" />
<ant dir="icu" target="dist-maven" />
+ <ant dir="phonetic" target="dist-maven" />
<ant dir="smartcn" target="dist-maven" />
<ant dir="stempel" target="dist-maven" />
</target>
@@ -83,6 +92,7 @@
<target name="javadocs">
<ant dir="common" target="javadocs" />
<ant dir="icu" target="javadocs" />
+ <ant dir="phonetic" target="javadocs" />
<ant dir="smartcn" target="javadocs" />
<ant dir="stempel" target="javadocs" />
</target>
@@ -90,6 +100,7 @@
<target name="javadocs-index.html">
<ant dir="common" target="javadocs-index.html" />
<ant dir="icu" target="javadocs-index.html" />
+ <ant dir="phonetic" target="javadocs-index.html" />
<ant dir="smartcn" target="javadocs-index.html" />
<ant dir="stempel" target="javadocs-index.html" />
</target>
Modified: lucene/dev/branches/realtime_search/modules/analysis/common/src/java/org/apache/lucene/analysis/ar/ArabicNormalizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/modules/analysis/common/src/java/org/apache/lucene/analysis/ar/ArabicNormalizer.java?rev=966819&r1=966818&r2=966819&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/modules/analysis/common/src/java/org/apache/lucene/analysis/ar/ArabicNormalizer.java (original)
+++ lucene/dev/branches/realtime_search/modules/analysis/common/src/java/org/apache/lucene/analysis/ar/ArabicNormalizer.java Thu Jul 22 19:34:35 2010
@@ -17,6 +17,8 @@ package org.apache.lucene.analysis.ar;
* limitations under the License.
*/
+import static org.apache.lucene.analysis.util.StemmerUtil.*;
+
/**
* Normalizer for Arabic.
* <p>
@@ -96,20 +98,4 @@ public class ArabicNormalizer {
return len;
}
-
- /**
- * Delete a character in-place
- *
- * @param s Input Buffer
- * @param pos Position of character to delete
- * @param len length of input buffer
- * @return length of input buffer after deletion
- */
- protected int delete(char s[], int pos, int len) {
- if (pos < len)
- System.arraycopy(s, pos + 1, s, pos, len - pos - 1);
-
- return len - 1;
- }
-
}
Modified: lucene/dev/branches/realtime_search/modules/analysis/common/src/java/org/apache/lucene/analysis/ar/ArabicStemmer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/modules/analysis/common/src/java/org/apache/lucene/analysis/ar/ArabicStemmer.java?rev=966819&r1=966818&r2=966819&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/modules/analysis/common/src/java/org/apache/lucene/analysis/ar/ArabicStemmer.java (original)
+++ lucene/dev/branches/realtime_search/modules/analysis/common/src/java/org/apache/lucene/analysis/ar/ArabicStemmer.java Thu Jul 22 19:34:35 2010
@@ -1,4 +1,6 @@
package org.apache.lucene.analysis.ar;
+
+
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@@ -16,6 +18,8 @@ package org.apache.lucene.analysis.ar;
* limitations under the License.
*/
+import static org.apache.lucene.analysis.util.StemmerUtil.*;
+
/**
* Stemmer for Arabic.
* <p>
@@ -86,7 +90,7 @@ public class ArabicStemmer {
*/
public int stemPrefix(char s[], int len) {
for (int i = 0; i < prefixes.length; i++)
- if (startsWith(s, len, prefixes[i]))
+ if (startsWithCheckLength(s, len, prefixes[i]))
return deleteN(s, 0, len, prefixes[i].length);
return len;
}
@@ -99,7 +103,7 @@ public class ArabicStemmer {
*/
public int stemSuffix(char s[], int len) {
for (int i = 0; i < suffixes.length; i++)
- if (endsWith(s, len, suffixes[i]))
+ if (endsWithCheckLength(s, len, suffixes[i]))
len = deleteN(s, len - suffixes[i].length, len, suffixes[i].length);
return len;
}
@@ -111,7 +115,7 @@ public class ArabicStemmer {
* @param prefix prefix to check
* @return true if the prefix matches and can be stemmed
*/
- boolean startsWith(char s[], int len, char prefix[]) {
+ boolean startsWithCheckLength(char s[], int len, char prefix[]) {
if (prefix.length == 1 && len < 4) { // wa- prefix requires at least 3 characters
return false;
} else if (len < prefix.length + 2) { // other prefixes require only 2.
@@ -132,7 +136,7 @@ public class ArabicStemmer {
* @param suffix suffix to check
* @return true if the suffix matches and can be stemmed
*/
- boolean endsWith(char s[], int len, char suffix[]) {
+ boolean endsWithCheckLength(char s[], int len, char suffix[]) {
if (len < suffix.length + 2) { // all suffixes require at least 2 characters after stemming
return false;
} else {
@@ -142,37 +146,5 @@ public class ArabicStemmer {
return true;
}
- }
-
-
- /**
- * Delete n characters in-place
- *
- * @param s Input Buffer
- * @param pos Position of character to delete
- * @param len Length of input buffer
- * @param nChars number of characters to delete
- * @return length of input buffer after deletion
- */
- protected int deleteN(char s[], int pos, int len, int nChars) {
- for (int i = 0; i < nChars; i++)
- len = delete(s, pos, len);
- return len;
- }
-
- /**
- * Delete a character in-place
- *
- * @param s Input Buffer
- * @param pos Position of character to delete
- * @param len length of input buffer
- * @return length of input buffer after deletion
- */
- protected int delete(char s[], int pos, int len) {
- if (pos < len)
- System.arraycopy(s, pos + 1, s, pos, len - pos - 1);
-
- return len - 1;
- }
-
+ }
}
Modified: lucene/dev/branches/realtime_search/modules/analysis/common/src/java/org/apache/lucene/analysis/bg/BulgarianStemmer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/modules/analysis/common/src/java/org/apache/lucene/analysis/bg/BulgarianStemmer.java?rev=966819&r1=966818&r2=966819&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/modules/analysis/common/src/java/org/apache/lucene/analysis/bg/BulgarianStemmer.java (original)
+++ lucene/dev/branches/realtime_search/modules/analysis/common/src/java/org/apache/lucene/analysis/bg/BulgarianStemmer.java Thu Jul 22 19:34:35 2010
@@ -17,6 +17,8 @@ package org.apache.lucene.analysis.bg;
* limitations under the License.
*/
+import static org.apache.lucene.analysis.util.StemmerUtil.*;
+
/**
* Light Stemmer for Bulgarian.
* <p>
@@ -138,15 +140,4 @@ public class BulgarianStemmer {
return len;
}
-
- private boolean endsWith(final char s[], final int len, final String suffix) {
- final int suffixLen = suffix.length();
- if (suffixLen > len)
- return false;
- for (int i = suffixLen - 1; i >= 0; i--)
- if (s[len -(suffixLen - i)] != suffix.charAt(i))
- return false;
-
- return true;
- }
}
Modified: lucene/dev/branches/realtime_search/modules/analysis/common/src/java/org/apache/lucene/analysis/cz/CzechStemmer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/modules/analysis/common/src/java/org/apache/lucene/analysis/cz/CzechStemmer.java?rev=966819&r1=966818&r2=966819&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/modules/analysis/common/src/java/org/apache/lucene/analysis/cz/CzechStemmer.java (original)
+++ lucene/dev/branches/realtime_search/modules/analysis/common/src/java/org/apache/lucene/analysis/cz/CzechStemmer.java Thu Jul 22 19:34:35 2010
@@ -17,6 +17,8 @@ package org.apache.lucene.analysis.cz;
* limitations under the License.
*/
+import static org.apache.lucene.analysis.util.StemmerUtil.*;
+
/**
* Light Stemmer for Czech.
* <p>
@@ -166,16 +168,4 @@ public class CzechStemmer {
return len;
}
-
- private boolean endsWith(char s[], int len, String suffix) {
- int suffixLen = suffix.length();
- if (suffixLen > len)
- return false;
-
- for (int i = suffixLen - 1; i >= 0; i--)
- if (s[len - (suffixLen - i)] != suffix.charAt(i))
- return false;
-
- return true;
- }
}
Modified: lucene/dev/branches/realtime_search/modules/analysis/common/src/java/org/apache/lucene/analysis/fa/PersianNormalizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/modules/analysis/common/src/java/org/apache/lucene/analysis/fa/PersianNormalizer.java?rev=966819&r1=966818&r2=966819&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/modules/analysis/common/src/java/org/apache/lucene/analysis/fa/PersianNormalizer.java (original)
+++ lucene/dev/branches/realtime_search/modules/analysis/common/src/java/org/apache/lucene/analysis/fa/PersianNormalizer.java Thu Jul 22 19:34:35 2010
@@ -17,6 +17,8 @@ package org.apache.lucene.analysis.fa;
* limitations under the License.
*/
+import static org.apache.lucene.analysis.util.StemmerUtil.*;
+
/**
* Normalizer for Persian.
* <p>
@@ -82,20 +84,4 @@ public class PersianNormalizer {
return len;
}
-
- /**
- * Delete a character in-place
- *
- * @param s Input Buffer
- * @param pos Position of character to delete
- * @param len length of input buffer
- * @return length of input buffer after deletion
- */
- protected int delete(char s[], int pos, int len) {
- if (pos < len)
- System.arraycopy(s, pos + 1, s, pos, len - pos - 1);
-
- return len - 1;
- }
-
}
Modified: lucene/dev/branches/realtime_search/modules/analysis/common/src/java/org/apache/lucene/analysis/hi/HindiNormalizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/modules/analysis/common/src/java/org/apache/lucene/analysis/hi/HindiNormalizer.java?rev=966819&r1=966818&r2=966819&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/modules/analysis/common/src/java/org/apache/lucene/analysis/hi/HindiNormalizer.java (original)
+++ lucene/dev/branches/realtime_search/modules/analysis/common/src/java/org/apache/lucene/analysis/hi/HindiNormalizer.java Thu Jul 22 19:34:35 2010
@@ -17,6 +17,8 @@ package org.apache.lucene.analysis.hi;
* limitations under the License.
*/
+import static org.apache.lucene.analysis.util.StemmerUtil.*;
+
/**
* Normalizer for Hindi.
* <p>
@@ -176,19 +178,4 @@ public class HindiNormalizer {
return len;
}
-
- /**
- * Delete a character in-place
- *
- * @param s Input Buffer
- * @param pos Position of character to delete
- * @param len length of input buffer
- * @return length of input buffer after deletion
- */
- protected int delete(char s[], int pos, int len) {
- if (pos < len)
- System.arraycopy(s, pos + 1, s, pos, len - pos - 1);
-
- return len - 1;
- }
}
Modified: lucene/dev/branches/realtime_search/modules/analysis/common/src/java/org/apache/lucene/analysis/hi/HindiStemmer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/modules/analysis/common/src/java/org/apache/lucene/analysis/hi/HindiStemmer.java?rev=966819&r1=966818&r2=966819&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/modules/analysis/common/src/java/org/apache/lucene/analysis/hi/HindiStemmer.java (original)
+++ lucene/dev/branches/realtime_search/modules/analysis/common/src/java/org/apache/lucene/analysis/hi/HindiStemmer.java Thu Jul 22 19:34:35 2010
@@ -17,6 +17,8 @@ package org.apache.lucene.analysis.hi;
* limitations under the License.
*/
+import static org.apache.lucene.analysis.util.StemmerUtil.*;
+
/**
* Light Stemmer for Hindi.
* <p>
@@ -116,15 +118,4 @@ public class HindiStemmer {
return len - 1;
return len;
}
-
- private boolean endsWith(final char s[], final int len, final String suffix) {
- final int suffixLen = suffix.length();
- if (suffixLen > len)
- return false;
- for (int i = suffixLen - 1; i >= 0; i--)
- if (s[len -(suffixLen - i)] != suffix.charAt(i))
- return false;
-
- return true;
- }
}
Modified: lucene/dev/branches/realtime_search/modules/analysis/common/src/java/org/apache/lucene/analysis/id/IndonesianStemmer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/modules/analysis/common/src/java/org/apache/lucene/analysis/id/IndonesianStemmer.java?rev=966819&r1=966818&r2=966819&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/modules/analysis/common/src/java/org/apache/lucene/analysis/id/IndonesianStemmer.java (original)
+++ lucene/dev/branches/realtime_search/modules/analysis/common/src/java/org/apache/lucene/analysis/id/IndonesianStemmer.java Thu Jul 22 19:34:35 2010
@@ -17,6 +17,8 @@ package org.apache.lucene.analysis.id;
* limitations under the License.
*/
+import static org.apache.lucene.analysis.util.StemmerUtil.*;
+
/**
* Stemmer for Indonesian.
* <p>
@@ -266,39 +268,5 @@ public class IndonesianStemmer {
return length - 1;
}
return length;
- }
-
- private boolean startsWith(char s[], int len, String prefix) {
- final int prefixLen = prefix.length();
- if (prefixLen > len)
- return false;
- for (int i = 0; i < prefixLen; i++)
- if (s[i] != prefix.charAt(i))
- return false;
- return true;
- }
-
- private boolean endsWith(char s[], int len, String suffix) {
- final int suffixLen = suffix.length();
- if (suffixLen > len)
- return false;
- for (int i = suffixLen - 1; i >= 0; i--)
- if (s[len -(suffixLen - i)] != suffix.charAt(i))
- return false;
-
- return true;
- }
-
- private int deleteN(char s[], int pos, int len, int nChars) {
- for (int i = 0; i < nChars; i++)
- len = delete(s, pos, len);
- return len;
- }
-
- private int delete(char s[], int pos, int len) {
- if (pos < len)
- System.arraycopy(s, pos + 1, s, pos, len - pos - 1);
-
- return len - 1;
- }
+ }
}
Modified: lucene/dev/branches/realtime_search/modules/analysis/common/src/java/org/apache/lucene/analysis/in/IndicNormalizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/modules/analysis/common/src/java/org/apache/lucene/analysis/in/IndicNormalizer.java?rev=966819&r1=966818&r2=966819&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/modules/analysis/common/src/java/org/apache/lucene/analysis/in/IndicNormalizer.java (original)
+++ lucene/dev/branches/realtime_search/modules/analysis/common/src/java/org/apache/lucene/analysis/in/IndicNormalizer.java Thu Jul 22 19:34:35 2010
@@ -20,6 +20,7 @@ package org.apache.lucene.analysis.in;
import java.util.BitSet;
import java.util.IdentityHashMap;
import static java.lang.Character.UnicodeBlock.*;
+import static org.apache.lucene.analysis.util.StemmerUtil.*;
/**
* Normalizes the Unicode representation of text in Indian languages.
@@ -290,14 +291,4 @@ public class IndicNormalizer {
return len;
}
-
- /**
- * Delete a character in-place
- */
- private int delete(char s[], int pos, int len) {
- if (pos < len)
- System.arraycopy(s, pos + 1, s, pos, len - pos - 1);
-
- return len - 1;
- }
}
Modified: lucene/dev/branches/realtime_search/modules/analysis/common/src/java/org/apache/lucene/analysis/query/QueryAutoStopWordAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/modules/analysis/common/src/java/org/apache/lucene/analysis/query/QueryAutoStopWordAnalyzer.java?rev=966819&r1=966818&r2=966819&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/modules/analysis/common/src/java/org/apache/lucene/analysis/query/QueryAutoStopWordAnalyzer.java (original)
+++ lucene/dev/branches/realtime_search/modules/analysis/common/src/java/org/apache/lucene/analysis/query/QueryAutoStopWordAnalyzer.java Thu Jul 22 19:34:35 2010
@@ -18,12 +18,14 @@ package org.apache.lucene.analysis.query
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
-import org.apache.lucene.index.TermEnum;
+import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.index.Terms;
+import org.apache.lucene.index.MultiFields;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.core.StopFilter;
-import org.apache.lucene.util.StringHelper;
import org.apache.lucene.util.Version;
+import org.apache.lucene.util.BytesRef;
import java.io.IOException;
import java.io.Reader;
@@ -141,20 +143,15 @@ public final class QueryAutoStopWordAnal
*/
public int addStopWords(IndexReader reader, String fieldName, int maxDocFreq) throws IOException {
HashSet<String> stopWords = new HashSet<String>();
- String internedFieldName = StringHelper.intern(fieldName);
- TermEnum te = reader.terms(new Term(fieldName));
- Term term = te.term();
- while (term != null) {
- if (term.field() != internedFieldName) {
- break;
+ Terms terms = MultiFields.getTerms(reader, fieldName);
+ if (terms != null) {
+ TermsEnum te = terms.iterator();
+ BytesRef text;
+ while ((text = te.next()) != null) {
+ if (te.docFreq() > maxDocFreq) {
+ stopWords.add(text.utf8ToString());
+ }
}
- if (te.docFreq() > maxDocFreq) {
- stopWords.add(term.text());
- }
- if (!te.next()) {
- break;
- }
- term = te.term();
}
stopWordsPerField.put(fieldName, stopWords);
Modified: lucene/dev/branches/realtime_search/modules/analysis/common/src/java/org/apache/lucene/analysis/snowball/SnowballAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/modules/analysis/common/src/java/org/apache/lucene/analysis/snowball/SnowballAnalyzer.java?rev=966819&r1=966818&r2=966819&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/modules/analysis/common/src/java/org/apache/lucene/analysis/snowball/SnowballAnalyzer.java (original)
+++ lucene/dev/branches/realtime_search/modules/analysis/common/src/java/org/apache/lucene/analysis/snowball/SnowballAnalyzer.java Thu Jul 22 19:34:35 2010
@@ -42,7 +42,7 @@ import java.util.Set;
* <li> As of 3.1, uses {@link TurkishLowerCaseFilter} for Turkish language.
* </ul>
* </p>
- * @deprecated Use the language-specific analyzer in contrib/analyzers instead.
+ * @deprecated Use the language-specific analyzer in modules/analysis instead.
* This analyzer will be removed in Lucene 4.0
*/
@Deprecated
Modified: lucene/dev/branches/realtime_search/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestKeywordAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestKeywordAnalyzer.java?rev=966819&r1=966818&r2=966819&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestKeywordAnalyzer.java (original)
+++ lucene/dev/branches/realtime_search/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestKeywordAnalyzer.java Thu Jul 22 19:34:35 2010
@@ -21,17 +21,17 @@ import java.io.StringReader;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.core.KeywordAnalyzer;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
-import org.apache.lucene.index.Term;
-import org.apache.lucene.index.TermDocs;
+import org.apache.lucene.index.DocsEnum;
+import org.apache.lucene.index.MultiFields;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.store.RAMDirectory;
+import org.apache.lucene.util.BytesRef;
public class TestKeywordAnalyzer extends BaseTokenStreamTestCase {
@@ -82,10 +82,16 @@ public class TestKeywordAnalyzer extends
writer.close();
IndexReader reader = IndexReader.open(dir, true);
- TermDocs td = reader.termDocs(new Term("partnum", "Q36"));
- assertTrue(td.next());
- td = reader.termDocs(new Term("partnum", "Q37"));
- assertTrue(td.next());
+ DocsEnum td = MultiFields.getTermDocsEnum(reader,
+ MultiFields.getDeletedDocs(reader),
+ "partnum",
+ new BytesRef("Q36"));
+ assertTrue(td.nextDoc() != DocsEnum.NO_MORE_DOCS);
+ td = MultiFields.getTermDocsEnum(reader,
+ MultiFields.getDeletedDocs(reader),
+ "partnum",
+ new BytesRef("Q37"));
+ assertTrue(td.nextDoc() != DocsEnum.NO_MORE_DOCS);
}
// LUCENE-1441
Modified: lucene/dev/branches/realtime_search/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestStandardAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestStandardAnalyzer.java?rev=966819&r1=966818&r2=966819&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestStandardAnalyzer.java (original)
+++ lucene/dev/branches/realtime_search/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestStandardAnalyzer.java Thu Jul 22 19:34:35 2010
@@ -12,10 +12,13 @@ import org.apache.lucene.index.IndexRead
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
-import org.apache.lucene.index.TermPositions;
+import org.apache.lucene.index.DocsAndPositionsEnum;
+import org.apache.lucene.index.DocsEnum;
+import org.apache.lucene.index.MultiFields;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.Version;
+import org.apache.lucene.util.BytesRef;
/**
@@ -279,8 +282,11 @@ public class TestStandardAnalyzer extend
// Make sure position is still incremented when
// massive term is skipped:
- TermPositions tps = reader.termPositions(new Term("content", "another"));
- assertTrue(tps.next());
+ DocsAndPositionsEnum tps = MultiFields.getTermPositionsEnum(reader,
+ MultiFields.getDeletedDocs(reader),
+ "content",
+ new BytesRef("another"));
+ assertTrue(tps.nextDoc() != DocsEnum.NO_MORE_DOCS);
assertEquals(1, tps.freq());
assertEquals(3, tps.nextPosition());
Modified: lucene/dev/branches/realtime_search/modules/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanStemFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/modules/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanStemFilter.java?rev=966819&r1=966818&r2=966819&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/modules/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanStemFilter.java (original)
+++ lucene/dev/branches/realtime_search/modules/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanStemFilter.java Thu Jul 22 19:34:35 2010
@@ -17,17 +17,17 @@ package org.apache.lucene.analysis.de;
* limitations under the License.
*/
-import java.io.BufferedReader;
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.InputStreamReader;
-import java.io.StringReader;
+import java.io.InputStream;
+import java.io.Reader;
+import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
-import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.core.KeywordTokenizer;
import org.apache.lucene.analysis.core.LowerCaseFilter;
+import org.apache.lucene.analysis.util.ReusableAnalyzerBase;
+
+import static org.apache.lucene.analysis.util.VocabularyAssert.*;
/**
* Test the German stemmer. The stemming algorithm is known to work less
@@ -38,25 +38,18 @@ import org.apache.lucene.analysis.core.L
public class TestGermanStemFilter extends BaseTokenStreamTestCase {
public void testStemming() throws Exception {
- Tokenizer tokenizer = new KeywordTokenizer(new StringReader(""));
- TokenFilter filter = new GermanStemFilter(new LowerCaseFilter(TEST_VERSION_CURRENT, tokenizer));
- // read test cases from external file:
- InputStreamReader isr = new InputStreamReader(getClass().getResourceAsStream("data.txt"), "iso-8859-1");
- BufferedReader breader = new BufferedReader(isr);
- while(true) {
- String line = breader.readLine();
- if (line == null)
- break;
- line = line.trim();
- if (line.startsWith("#") || line.equals(""))
- continue; // ignore comments and empty lines
- String[] parts = line.split(";");
- //System.out.println(parts[0] + " -- " + parts[1]);
- tokenizer.reset(new StringReader(parts[0]));
- filter.reset();
- assertTokenStreamContents(filter, new String[] { parts[1] });
- }
- breader.close();
- isr.close();
+ Analyzer analyzer = new ReusableAnalyzerBase() {
+ @Override
+ protected TokenStreamComponents createComponents(String fieldName,
+ Reader reader) {
+ Tokenizer t = new KeywordTokenizer(reader);
+ return new TokenStreamComponents(t,
+ new GermanStemFilter(new LowerCaseFilter(TEST_VERSION_CURRENT, t)));
+ }
+ };
+
+ InputStream vocOut = getClass().getResourceAsStream("data.txt");
+ assertVocabulary(analyzer, vocOut);
+ vocOut.close();
}
}
Modified: lucene/dev/branches/realtime_search/modules/analysis/common/src/test/org/apache/lucene/analysis/de/data.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/modules/analysis/common/src/test/org/apache/lucene/analysis/de/data.txt?rev=966819&r1=966818&r2=966819&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/modules/analysis/common/src/test/org/apache/lucene/analysis/de/data.txt (original)
+++ lucene/dev/branches/realtime_search/modules/analysis/common/src/test/org/apache/lucene/analysis/de/data.txt Thu Jul 22 19:34:35 2010
@@ -1,48 +1,48 @@
# German special characters are replaced:
-häufig;haufig
+häufig haufig
# here the stemmer works okay, it maps related words to the same stem:
-abschließen;abschliess
-abschließender;abschliess
-abschließendes;abschliess
-abschließenden;abschliess
-
-Tisch;tisch
-Tische;tisch
-Tischen;tisch
-
-Haus;hau
-Hauses;hau
-Häuser;hau
-Häusern;hau
+abschlieÃen abschliess
+abschlieÃender abschliess
+abschlieÃendes abschliess
+abschlieÃenden abschliess
+
+Tisch tisch
+Tische tisch
+Tischen tisch
+
+Haus hau
+Hauses hau
+Häuser hau
+Häusern hau
# here's a case where overstemming occurs, i.e. a word is
# mapped to the same stem as unrelated words:
-hauen;hau
+hauen hau
# here's a case where understemming occurs, i.e. two related words
# are not mapped to the same stem. This is the case with basically
# all irregular forms:
-Drama;drama
-Dramen;dram
+Drama drama
+Dramen dram
-# replace "ß" with 'ss':
-Ausmaß;ausmass
+# replace "Ã" with 'ss':
+Ausmaà ausmass
# fake words to test if suffixes are cut off:
-xxxxxe;xxxxx
-xxxxxs;xxxxx
-xxxxxn;xxxxx
-xxxxxt;xxxxx
-xxxxxem;xxxxx
-xxxxxer;xxxxx
-xxxxxnd;xxxxx
+xxxxxe xxxxx
+xxxxxs xxxxx
+xxxxxn xxxxx
+xxxxxt xxxxx
+xxxxxem xxxxx
+xxxxxer xxxxx
+xxxxxnd xxxxx
# the suffixes are also removed when combined:
-xxxxxetende;xxxxx
+xxxxxetende xxxxx
# words that are shorter than four charcters are not changed:
-xxe;xxe
+xxe xxe
# -em and -er are not removed from words shorter than five characters:
-xxem;xxem
-xxer;xxer
+xxem xxem
+xxer xxer
# -nd is not removed from words shorter than six characters:
-xxxnd;xxxnd
+xxxnd xxxnd
Modified: lucene/dev/branches/realtime_search/modules/analysis/common/src/test/org/apache/lucene/analysis/en/TestPorterStemFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/modules/analysis/common/src/test/org/apache/lucene/analysis/en/TestPorterStemFilter.java?rev=966819&r1=966818&r2=966819&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/modules/analysis/common/src/test/org/apache/lucene/analysis/en/TestPorterStemFilter.java (original)
+++ lucene/dev/branches/realtime_search/modules/analysis/common/src/test/org/apache/lucene/analysis/en/TestPorterStemFilter.java Thu Jul 22 19:34:35 2010
@@ -17,21 +17,22 @@ package org.apache.lucene.analysis.en;
* limitations under the License.
*/
-import java.io.BufferedReader;
import java.io.IOException;
-import java.io.InputStream;
-import java.io.InputStreamReader;
+import java.io.Reader;
import java.io.StringReader;
-import java.util.zip.ZipFile;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.core.KeywordTokenizer;
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter;
import org.apache.lucene.analysis.util.CharArraySet;
+import org.apache.lucene.analysis.util.ReusableAnalyzerBase;
+import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
+import static org.apache.lucene.analysis.util.VocabularyAssert.*;
+
/**
* Test the PorterStemFilter with Martin Porter's test data.
*/
@@ -41,26 +42,16 @@ public class TestPorterStemFilter extend
* The output should be the same as the string in output.txt
*/
public void testPorterStemFilter() throws Exception {
- Tokenizer tokenizer = new KeywordTokenizer(new StringReader(""));
- TokenStream filter = new PorterStemFilter(tokenizer);
- ZipFile zipFile = new ZipFile(getDataFile("porterTestData.zip"));
- InputStream voc = zipFile.getInputStream(zipFile.getEntry("voc.txt"));
- InputStream out = zipFile.getInputStream(zipFile.getEntry("output.txt"));
- BufferedReader vocReader = new BufferedReader(new InputStreamReader(
- voc, "UTF-8"));
- BufferedReader outputReader = new BufferedReader(new InputStreamReader(
- out, "UTF-8"));
- String inputWord = null;
- while ((inputWord = vocReader.readLine()) != null) {
- String expectedWord = outputReader.readLine();
- assertNotNull(expectedWord);
- tokenizer.reset(new StringReader(inputWord));
- filter.reset();
- assertTokenStreamContents(filter, new String[] { expectedWord });
- }
- vocReader.close();
- outputReader.close();
- zipFile.close();
+ Analyzer a = new ReusableAnalyzerBase() {
+ @Override
+ protected TokenStreamComponents createComponents(String fieldName,
+ Reader reader) {
+ Tokenizer t = new KeywordTokenizer(reader);
+ return new TokenStreamComponents(t, new PorterStemFilter(t));
+ }
+ };
+
+ assertVocabulary(a, getDataFile("porterTestData.zip"), "voc.txt", "output.txt");
}
public void testWithKeywordAttribute() throws IOException {
Propchange: lucene/dev/branches/realtime_search/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestISOLatin1AccentFilter.java
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Thu Jul 22 19:34:35 2010
@@ -1,3 +1,4 @@
+/lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestISOLatin1AccentFilter.java:953476-966816
/lucene/java/branches/flex_1458/src/test/org/apache/lucene/analysis/TestISOLatin1AccentFilter.java:824912-931101
/lucene/java/branches/lucene_2_4/src/test/org/apache/lucene/analysis/TestISOLatin1AccentFilter.java:748824
/lucene/java/branches/lucene_2_9/src/test/org/apache/lucene/analysis/TestISOLatin1AccentFilter.java:829134,829881,831036,896850,909334
Modified: lucene/dev/branches/realtime_search/modules/analysis/common/src/test/org/apache/lucene/analysis/ru/TestRussianStem.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/modules/analysis/common/src/test/org/apache/lucene/analysis/ru/TestRussianStem.java?rev=966819&r1=966818&r2=966819&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/modules/analysis/common/src/test/org/apache/lucene/analysis/ru/TestRussianStem.java (original)
+++ lucene/dev/branches/realtime_search/modules/analysis/common/src/test/org/apache/lucene/analysis/ru/TestRussianStem.java Thu Jul 22 19:34:35 2010
@@ -17,71 +17,35 @@ package org.apache.lucene.analysis.ru;
* limitations under the License.
*/
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.core.KeywordTokenizer;
+import org.apache.lucene.analysis.util.ReusableAnalyzerBase;
import org.apache.lucene.util.LuceneTestCase;
-import java.io.BufferedReader;
-import java.io.File;
-import java.io.InputStreamReader;
-import java.io.FileInputStream;
-import java.util.ArrayList;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.Reader;
+
+import static org.apache.lucene.analysis.util.VocabularyAssert.*;
/**
* @deprecated Remove this test class (and its datafiles!) in Lucene 4.0
*/
@Deprecated
-public class TestRussianStem extends LuceneTestCase
-{
- private ArrayList<String> words = new ArrayList<String>();
- private ArrayList<String> stems = new ArrayList<String>();
-
- public TestRussianStem(String name)
- {
- super(name);
- }
-
- /**
- * @see TestCase#setUp()
- */
- @Override
- protected void setUp() throws Exception {
- super.setUp();
- //System.out.println(new java.util.Date());
- String str;
-
- // open and read words into an array list
- BufferedReader inWords =
- new BufferedReader(
- new InputStreamReader(
- getClass().getResourceAsStream("wordsUTF8.txt"),
- "UTF-8"));
- while ((str = inWords.readLine()) != null)
- {
- words.add(str);
- }
- inWords.close();
-
- // open and read stems into an array list
- BufferedReader inStems =
- new BufferedReader(
- new InputStreamReader(
- getClass().getResourceAsStream("stemsUTF8.txt"),
- "UTF-8"));
- while ((str = inStems.readLine()) != null)
- {
- stems.add(str);
- }
- inStems.close();
- }
-
- public void testStem()
- {
- for (int i = 0; i < words.size(); i++)
- {
- //if ( (i % 100) == 0 ) System.err.println(i);
- String realStem =
- RussianStemmer.stemWord(
- words.get(i));
- assertEquals("unicode", stems.get(i), realStem);
- }
- }
-
+public class TestRussianStem extends LuceneTestCase {
+ public void testStem() throws IOException {
+ Analyzer a = new ReusableAnalyzerBase() {
+ @Override
+ protected TokenStreamComponents createComponents(String fieldName,
+ Reader reader) {
+ Tokenizer t = new KeywordTokenizer(reader);
+ return new TokenStreamComponents(t, new RussianStemFilter(t));
+ }
+ };
+ InputStream voc = getClass().getResourceAsStream("wordsUTF8.txt");
+ InputStream out = getClass().getResourceAsStream("stemsUTF8.txt");
+ assertVocabulary(a, voc, out);
+ voc.close();
+ out.close();
+ }
}
Modified: lucene/dev/branches/realtime_search/modules/analysis/common/src/test/org/apache/lucene/analysis/snowball/TestSnowballVocab.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/modules/analysis/common/src/test/org/apache/lucene/analysis/snowball/TestSnowballVocab.java?rev=966819&r1=966818&r2=966819&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/modules/analysis/common/src/test/org/apache/lucene/analysis/snowball/TestSnowballVocab.java (original)
+++ lucene/dev/branches/realtime_search/modules/analysis/common/src/test/org/apache/lucene/analysis/snowball/TestSnowballVocab.java Thu Jul 22 19:34:35 2010
@@ -17,38 +17,21 @@ package org.apache.lucene.analysis.snowb
* limitations under the License.
*/
-import java.io.BufferedReader;
import java.io.IOException;
-import java.io.InputStream;
-import java.io.InputStreamReader;
-import java.io.StringReader;
-import java.util.zip.ZipFile;
+import java.io.Reader;
-import org.apache.lucene.analysis.BaseTokenStreamTestCase;
-import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.core.KeywordTokenizer;
+import org.apache.lucene.analysis.util.ReusableAnalyzerBase;
+import org.apache.lucene.util.LuceneTestCase;
+
+import static org.apache.lucene.analysis.util.VocabularyAssert.*;
/**
* Test the snowball filters against the snowball data tests
*/
-public class TestSnowballVocab extends BaseTokenStreamTestCase {
- private Tokenizer tokenizer = new KeywordTokenizer(new StringReader(""));
- ZipFile zipFile = null;
-
- @Override
- protected void setUp() throws Exception {
- super.setUp();
- this.zipFile = new ZipFile(getDataFile("TestSnowballVocabData.zip"));
- }
-
- @Override
- protected void tearDown() throws Exception {
- this.zipFile.close();
- this.zipFile = null;
- super.tearDown();
- }
-
+public class TestSnowballVocab extends LuceneTestCase {
/**
* Run all languages against their snowball vocabulary tests.
*/
@@ -82,25 +65,20 @@ public class TestSnowballVocab extends B
* For the supplied language, run the stemmer against all strings in voc.txt
* The output should be the same as the string in output.txt
*/
- private void assertCorrectOutput(String snowballLanguage, String dataDirectory)
+ private void assertCorrectOutput(final String snowballLanguage, String dataDirectory)
throws IOException {
if (VERBOSE) System.out.println("checking snowball language: " + snowballLanguage);
- TokenStream filter = new SnowballFilter(tokenizer, snowballLanguage);
- InputStream voc = zipFile.getInputStream(zipFile.getEntry(dataDirectory + "/voc.txt"));
- InputStream out = zipFile.getInputStream(zipFile.getEntry(dataDirectory + "/output.txt"));
- BufferedReader vocReader = new BufferedReader(new InputStreamReader(
- voc, "UTF-8"));
- BufferedReader outputReader = new BufferedReader(new InputStreamReader(
- out, "UTF-8"));
- String inputWord = null;
- while ((inputWord = vocReader.readLine()) != null) {
- String expectedWord = outputReader.readLine();
- assertNotNull(expectedWord);
- tokenizer.reset(new StringReader(inputWord));
- filter.reset();
- assertTokenStreamContents(filter, new String[] {expectedWord});
- }
- vocReader.close();
- outputReader.close();
+
+ Analyzer a = new ReusableAnalyzerBase() {
+ @Override
+ protected TokenStreamComponents createComponents(String fieldName,
+ Reader reader) {
+ Tokenizer t = new KeywordTokenizer(reader);
+ return new TokenStreamComponents(t, new SnowballFilter(t, snowballLanguage));
+ }
+ };
+
+ assertVocabulary(a, getDataFile("TestSnowballVocabData.zip"),
+ dataDirectory + "/voc.txt", dataDirectory + "/output.txt");
}
}
Propchange: lucene/dev/branches/realtime_search/solr/
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Thu Jul 22 19:34:35 2010
@@ -1,4 +1,5 @@
-/lucene/dev/branches/branch_3x/solr:949730
+/lucene/dev/branches/branch_3x/solr:949730,957490*,961612
+/lucene/dev/trunk/solr:953476-966816
/lucene/java/branches/lucene_2_4/solr:748824
/lucene/java/branches/lucene_2_9/solr:817269-818600,825998,829134,829881,831036,896850,909334
/lucene/java/branches/lucene_2_9_back_compat_tests/solr:818601-821336
Modified: lucene/dev/branches/realtime_search/solr/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/solr/CHANGES.txt?rev=966819&r1=966818&r2=966819&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/solr/CHANGES.txt (original)
+++ lucene/dev/branches/realtime_search/solr/CHANGES.txt Thu Jul 22 19:34:35 2010
@@ -27,6 +27,7 @@ Versions of Major Components
Apache Lucene trunk
Apache Tika 0.6
Carrot2 3.1.0
+Velocity 1.6.1
Upgrading from Solr 1.4
@@ -121,13 +122,13 @@ New Features
* SOLR-1131: FieldTypes can now output multiple Fields per Type and still be searched. This can be handy for hiding the details of a particular
implementation such as in the spatial case. (Chris Mattmann, shalin, noble, gsingers, yonik)
-* SOLR-1586: Add support for Geohash and Spatial Tile FieldType (Chris Mattmann, gsingers)
+* SOLR-1586: Add support for Geohash FieldType (Chris Mattmann, gsingers)
* SOLR-1697: PluginInfo should load plugins w/o class attribute also (noble)
* SOLR-1268: Incorporate FastVectorHighlighter (koji)
-* SOLR-1750: SystemInfoRequestHandler added for simpler programmatic access
+* SOLR-1750: SolrInfoMBeanHandler added for simpler programmatic access
to info currently available from registry.jsp and stats.jsp
(ehatcher, hossman)
@@ -176,6 +177,31 @@ New Features
* SOLR-1915: DebugComponent now supports using a NamedList to model
Explanation objects in it's responses instead of
Explanation.toString (hossman)
+
+* SOLR-1932: New relevancy function queries: termfreq, tf, docfreq, idf
+ norm, maxdoc, numdocs. (yonik)
+
+* SOLR-1957: The VelocityResponseWriter contrib moved to core.
+ Example search UI now available at http://localhost:8983/solr/browse
+ (ehatcher)
+
+* SOLR-1974: Add LimitTokenCountFilterFactory. (koji)
+
+* SOLR-1966: QueryElevationComponent can now return just the included results in the elevation file (gsingers, yonik)
+
+* SOLR-1556: TermVectorComponent now supports per field overrides. Also, it now throws an error
+ if passed in fields do not exist and warnings
+ if fields that do not have term vector options (termVectors, offsets, positions)
+ that align with the schema declaration. It also
+ will now return warnings about (gsingers)
+
+* SOLR-1985: FastVectorHighlighter: add wrapper class for Lucene's SingleFragListBuilder (koji)
+
+* SOLR-1984: Add HyphenationCompoundWordTokenFilterFactory. (PB via rmuir)
+
+* SOLR-1568: Added "native" filtering support for PointType, GeohashField. Added LatLonType with filtering support too. See
+ http://wiki.apache.org/solr/SpatialSearch and the example. Refactored some items in Lucene spatial.
+ Removed SpatialTileField as the underlying CartesianTier is broken beyond repair and is going to be moved. (gsingers)
Optimizations
----------------------
@@ -200,6 +226,11 @@ Optimizations
JMX. The default root name is "solr" followed by the core name.
(Constantijn Visinescu, hossman)
+* SOLR-1968: speed up initial filter cache population for facet.method=enum and
+ also big terms for multi-valued facet.method=fc. The resulting speedup
+ for the first facet request is anywhere from 30% to 32x, depending on how many
+ terms are in the field and how many documents match per term. (yonik)
+
Bug Fixes
----------------------
@@ -275,8 +306,10 @@ Bug Fixes
(Robert Muir via shalin)
* SOLR-1711: SolrJ - StreamingUpdateSolrServer had a race condition that
- could halt the streaming of documents. (Attila Babo via yonik)
-
+ could halt the streaming of documents. The original patch to fix this
+ (never officially released) introduced another hanging bug due to
+ connections not being released. (Attila Babo, Erik Hetzner via yonik)
+
* SOLR-1748, SOLR-1747, SOLR-1746, SOLR-1745, SOLR-1744: Streams and Readers
retrieved from ContentStreams are not closed in various places, resulting
in file descriptor leaks.
@@ -328,6 +361,25 @@ Bug Fixes
* SOLR-1914: Change the JSON response format to output float/double
values of NaN,Infinity,-Infinity as strings. (yonik)
+* SOLR-1948: PatternTokenizerFactory should use parent's args (koji)
+
+* SOLR-1870: Indexing documents using the 'javabin' format no longer
+ fails with a ClassCastException whenSolrInputDocuments contain field
+ values which are Collections or other classes that implement
+ Iterable. (noble, hossman)
+
+* SOLR-1981: Solr will now fail correctly if solr.xml attempts to
+ specify multiple cores that have the same name (hossman)
+
+* SOLR-1791: Fix messed up core names on admin gui (yonik via koji)
+
+* SOLR-1995: Change date format from "hour in am/pm" to "hour in day"
+ in CoreContainer and SnapShooter. (Hayato Ito, koji)
+
+* SOLR-2008: avoid possible RejectedExecutionException w/autoCommit
+ by making SolreCore close the UpdateHandler before closing the
+ SearchExecutor. (NarasimhaRaju, hossman)
+
Other Changes
----------------------
@@ -417,6 +469,12 @@ Other Changes
ExtendedDismaxQParser has been changed to be determined based on the
effective value of the 'q.op' param (hossman)
+* SOLR-1946: Misc improvements to the SystemInfoHandler: /admin/system
+ (hossman)
+
+* SOLR-2003: SolrResourceLoader will report any encoding errors, rather than
+ silently using replacement characters for invalid inputs (blargy via rmuir)
+
Build
----------------------
Modified: lucene/dev/branches/realtime_search/solr/NOTICE.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/solr/NOTICE.txt?rev=966819&r1=966818&r2=966819&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/solr/NOTICE.txt (original)
+++ lucene/dev/branches/realtime_search/solr/NOTICE.txt Thu Jul 22 19:34:35 2010
@@ -1,6 +1,6 @@
==============================================================
Apache Solr
- Copyright 2006-2008 The Apache Software Foundation
+ Copyright 2006-2010 The Apache Software Foundation
==============================================================
This product includes software developed by
Modified: lucene/dev/branches/realtime_search/solr/build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/solr/build.xml?rev=966819&r1=966818&r2=966819&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/solr/build.xml (original)
+++ lucene/dev/branches/realtime_search/solr/build.xml Thu Jul 22 19:34:35 2010
@@ -62,7 +62,7 @@
<echo message="Use 'ant dist' to build the project WAR and JAR files." />
<echo message="Use 'ant generate-maven-artifacts' to generate maven artifacts." />
<echo message="Use 'ant package' to generate zip, tgz, and maven artifacts for distribution." />
- <echo message="Use 'ant luke' to start luke. see: http://www.getopt.org/luke/" />
+ <echo message="Use 'ant luke' to start luke. see: http://luke.googlecode.com" />
<echo message="Use 'ant test' to run unit tests." />
</target>
@@ -89,16 +89,16 @@
<!-- no description, don't advertise -->
<!-- Building off Lucene trunk, we don't need this
- <available file="lib/lucene-core-${lucene_version}.jar"
+ <available file="lib/lucene-core-${version}.jar"
property="lucene-version-ok-m2deploy-wont-fail" />
<fail unless="lucene-version-ok-m2deploy-wont-fail">
- lib/lucene-core-${lucene_version}.jar Missing
+ lib/lucene-core-${version}.jar Missing
- lib/lucene-core-${lucene_version}.jar does not exist.
+ lib/lucene-core-${version}.jar does not exist.
This will cause problems with m2-deploy later, so fail fast now.
Probably cause: lucene jars were upgraded w/o modifying the
- 'lucene_version' property in common-build.xml
+ 'version' property in common-build.xml
</fail>
-->
@@ -221,13 +221,13 @@
<packageset dir="${src}/java" />
<packageset dir="${src}/webapp/src" />
<packageset dir="contrib/dataimporthandler/src/main/java" />
- <packageset dir="contrib/clustering/src/main/java" />
+ <!--<packageset dir="contrib/clustering/src/main/java" />-->
<packageset dir="contrib/extraction/src/main/java" />
<group title="Core" packages="org.apache.*" />
<group title="Common" packages="org.apache.solr.common.*" />
<group title="SolrJ" packages="org.apache.solr.client.solrj*" />
<group title="contrib: DataImportHandler" packages="org.apache.solr.handler.dataimport*" />
- <group title="contrib: Clustering" packages="org.apache.solr.handler.clustering*" />
+ <!--<group title="contrib: Clustering" packages="org.apache.solr.handler.clustering*" />-->
<group title="contrib: Solr Cell" packages="org.apache.solr.handler.extraction*" />
</sources>
</invoke-javadoc>
@@ -384,7 +384,7 @@
</or>
</condition>
- <target name="junit" depends="compileTests,dist-contrib,junit-mkdir,junit-sequential,junit-parallel"/>
+ <target name="junit" depends="compileTests,junit-mkdir,junit-sequential,junit-parallel"/>
<target name="junit-sequential" if="runsequential">
<junit-macro/>
@@ -602,10 +602,8 @@
<solr-jar destfile="${dist}/apache-solr-cell-src-${version}.jar"
basedir="contrib/extraction/src" />
- <solr-jar destfile="${dist}/apache-solr-velocity-src-${version}.jar"
- basedir="contrib/velocity/src" />
- <solr-jar destfile="${dist}/apache-solr-clustering-src-${version}.jar"
- basedir="contrib/clustering/src" />
+ <!--<solr-jar destfile="${dist}/apache-solr-clustering-src-${version}.jar"
+ basedir="contrib/clustering/src" />-->
</target>
<target name="dist-javadoc" description="Creates the Solr javadoc distribution files"
@@ -618,10 +616,8 @@
basedir="${build.javadoc}/solrj" />
<solr-jar destfile="${dist}/apache-solr-dataimporthandler-docs-${version}.jar"
basedir="${build.javadoc}/contrib-solr-dataimporthandler" />
- <solr-jar destfile="${dist}/apache-solr-clustering-docs-${version}.jar"
- basedir="${build.javadoc}/contrib-solr-clustering" />
- <solr-jar destfile="${dist}/apache-solr-velocity-docs-${version}.jar"
- basedir="${build.javadoc}/contrib-solr-velocity" />
+ <!--<solr-jar destfile="${dist}/apache-solr-clustering-docs-${version}.jar"
+ basedir="${build.javadoc}/contrib-solr-clustering" />-->
<solr-jar destfile="${dist}/apache-solr-cell-docs-${version}.jar"
basedir="${build.javadoc}/contrib-solr-cell" />
</target>
@@ -663,7 +659,7 @@
<target name="example"
description="Creates a runnable example configuration."
- depends="init-forrest-entities,dist-contrib,dist-war,example-contrib">
+ depends="init-forrest-entities,compile-lucene,dist-contrib,dist-war,example-contrib">
<copy file="${dist}/${fullnamever}.war"
tofile="${example}/webapps/${ant.project.name}.war"/>
<jar destfile="${example}/exampledocs/post.jar"
@@ -682,7 +678,8 @@
<echo>See ${example}/README.txt for how to run the Solr example configuration.</echo>
</target>
- <target name="run-example" depends="example">
+ <target name="run-example" depends="example"
+ description="Run Solr interactively, via Jetty. -Dexample.debug=true to enable JVM debugger">
<property name="example.solr.home" location="example/solr"/>
<property name="example.data.dir" location="example/solr/data"/>
<property name="example.debug.suspend" value="n"/>
@@ -802,7 +799,7 @@
<sign-maven-artifacts artifact.id="solr-core" gpg.passphrase="${gpg.passphrase}"/>
<sign-maven-artifacts artifact.id="solr-dataimporthandler" gpg.passphrase="${gpg.passphrase}"/>
<sign-maven-artifacts artifact.id="solr-dataimporthandler-extras" gpg.passphrase="${gpg.passphrase}"/>
- <sign-maven-artifacts artifact.id="solr-clustering" gpg.passphrase="${gpg.passphrase}"/>
+ <!--<sign-maven-artifacts artifact.id="solr-clustering" gpg.passphrase="${gpg.passphrase}"/>-->
<sign-maven-artifacts artifact.id="solr-cell" gpg.passphrase="${gpg.passphrase}"/>
<sign-maven-dependency-artifacts artifact.id="solr-lucene-analyzers" gpg.passphrase="${gpg.passphrase}"/>
@@ -812,11 +809,10 @@
<sign-maven-dependency-artifacts artifact.id="solr-lucene-snowball" gpg.passphrase="${gpg.passphrase}"/>
<sign-maven-dependency-artifacts artifact.id="solr-lucene-spellchecker" gpg.passphrase="${gpg.passphrase}"/>
<sign-maven-artifacts artifact.id="solr-solrj" gpg.passphrase="${gpg.passphrase}"/>
- <sign-maven-artifacts artifact.id="solr-velocity" gpg.passphrase="${gpg.passphrase}"/>
<!-- Thes are special since there are no jars, just poms -->
- <sign-artifact input.file="${maven.dist.prefix}/solr-parent/${maven_version}/solr-parent-${maven_version}.pom" gpg.passphrase="${gpg.passphrase}"/>
- <sign-artifact input.file="${maven.dist.prefix}/solr-lucene-contrib/${maven_version}/solr-lucene-contrib-${maven_version}.pom" gpg.passphrase="${gpg.passphrase}"/>
+ <sign-artifact input.file="${maven.dist.prefix}/solr-parent/${version}/solr-parent-${version}.pom" gpg.passphrase="${gpg.passphrase}"/>
+ <sign-artifact input.file="${maven.dist.prefix}/solr-lucene-contrib/${version}/solr-lucene-contrib-${version}.pom" gpg.passphrase="${gpg.passphrase}"/>
</target>
@@ -882,27 +878,18 @@
</artifact-attachments>
</m2-deploy>
- <m2-deploy pom.xml="contrib/clustering/solr-clustering-pom.xml.template"
+ <!--<m2-deploy pom.xml="contrib/clustering/solr-clustering-pom.xml.template"
jar.file="${dist}/apache-solr-clustering-${version}.jar">
<artifact-attachments>
<attach file="${dist}/apache-solr-clustering-src-${version}.jar" classifier="sources"/>
<attach file="${dist}/apache-solr-clustering-docs-${version}.jar" classifier="javadoc"/>
</artifact-attachments>
- </m2-deploy>
+ </m2-deploy>-->
<!-- Clustring specific -->
<!-- TODO: MORE NEEDED HERE ONCE WE FINALIZE THE LIBS FOR CARROT -->
<!-- end clustering specific -->
- <m2-deploy pom.xml="contrib/velocity/solr-velocity-pom.xml.template"
- jar.file="contrib/velocity/src/main/solr/lib/apache-solr-velocity-${version}.jar">
-
- <artifact-attachments>
- <attach file="${dist}/apache-solr-velocity-src-${version}.jar" classifier="sources"/>
- <attach file="${dist}/apache-solr-velocity-docs-${version}.jar" classifier="javadoc"/>
- </artifact-attachments>
- </m2-deploy>
-
<m2-deploy pom.xml="src/maven/solr-core-pom.xml.template"
jar.file="${dist}/apache-solr-core-${version}.jar">
Modified: lucene/dev/branches/realtime_search/solr/common-build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/solr/common-build.xml?rev=966819&r1=966818&r2=966819&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/solr/common-build.xml (original)
+++ lucene/dev/branches/realtime_search/solr/common-build.xml Thu Jul 22 19:34:35 2010
@@ -118,14 +118,6 @@
<property name="maven.dist.prefix" value="${maven.dist.dir}/org/apache/solr"/>
- <!--
- The version suffix of the Lucene artifacts checked into "lib"
- IF YOU CHANGE THIS, SANITY CHECK "javadoc.link.lucene"
- -->
- <property name="lucene_version" value="${version}"/>
- <!-- The version number to assign to the Maven artifacts. -->
- <property name="maven_version" value="4.0-SNAPSHOT"/>
-
<!-- By default, "deploy" to a temporary directory (as well as installing
into your local repository). If you wish to deploy to a remote
repository, set this property to the URL of that repository. In
@@ -155,6 +147,7 @@
<path id="lucene.classpath">
<pathelement location="${common-solr.dir}/../lucene/build/classes/java" />
<pathelement location="${common-solr.dir}/../modules/analysis/build/common/classes/java" />
+ <pathelement location="${common-solr.dir}/../modules/analysis/build/phonetic/classes/java" />
<pathelement location="${common-solr.dir}/../lucene/build/contrib/highlighter/classes/java" />
<pathelement location="${common-solr.dir}/../lucene/build/contrib/memory/classes/java" />
<pathelement location="${common-solr.dir}/../lucene/build/contrib/misc/classes/java" />
@@ -170,6 +163,7 @@
</subant>
<subant target="jar" inheritall="false" failonerror="true">
<fileset dir="../modules/analysis/common" includes="build.xml" />
+ <fileset dir="../modules/analysis/phonetic" includes="build.xml" />
<fileset dir="../lucene/contrib/highlighter" includes="build.xml" />
<fileset dir="../lucene/contrib/memory" includes="build.xml" />
<fileset dir="../lucene/contrib/misc" includes="build.xml" />
@@ -184,28 +178,31 @@
<mkdir dir="${lucene-libs}"/>
<copy todir="${lucene-libs}" preservelastmodified="true" flatten="true" failonerror="true" overwrite="true">
<fileset dir="../lucene/build/">
- <include name="lucene-core-${lucene_version}.jar" />
+ <include name="lucene-core-${version}.jar" />
</fileset>
<fileset dir="../modules/analysis/build/common">
- <include name="lucene-analyzers-common-${lucene_version}.jar" />
+ <include name="lucene-analyzers-common-${version}.jar" />
+ </fileset>
+ <fileset dir="../modules/analysis/build/phonetic">
+ <include name="lucene-analyzers-phonetic-${version}.jar" />
</fileset>
<fileset dir="../lucene/build/contrib/highlighter">
- <include name="lucene-highlighter-${lucene_version}.jar" />
+ <include name="lucene-highlighter-${version}.jar" />
</fileset>
<fileset dir="../lucene/build/contrib/memory">
- <include name="lucene-memory-${lucene_version}.jar" />
+ <include name="lucene-memory-${version}.jar" />
</fileset>
<fileset dir="../lucene/build/contrib/misc">
- <include name="lucene-misc-${lucene_version}.jar" />
+ <include name="lucene-misc-${version}.jar" />
</fileset>
<fileset dir="../lucene/build/contrib/queries">
- <include name="lucene-queries-${lucene_version}.jar" />
+ <include name="lucene-queries-${version}.jar" />
</fileset>
<fileset dir="../lucene/build/contrib/spatial">
- <include name="lucene-spatial-${lucene_version}.jar" />
+ <include name="lucene-spatial-${version}.jar" />
</fileset>
<fileset dir="../lucene/build/contrib/spellchecker">
- <include name="lucene-spellchecker-${lucene_version}.jar" />
+ <include name="lucene-spellchecker-${version}.jar" />
</fileset>
</copy>
</target>
@@ -214,6 +211,7 @@
<property name="lucene-compiled" value="true"/>
<subant target="default">
<fileset dir="../modules/analysis/common" includes="build.xml"/>
+ <fileset dir="../modules/analysis/phonetic" includes="build.xml"/>
<fileset dir="../lucene/contrib/highlighter" includes="build.xml"/>
<fileset dir="../lucene/contrib/memory" includes="build.xml"/>
<fileset dir="../lucene/contrib/misc" includes="build.xml"/>
@@ -347,7 +345,7 @@
<sequential>
<copy file="@{pom.xml}" tofile="${maven.build.dir}/@{pom.xml}">
<filterset begintoken="@" endtoken="@">
- <filter token="maven_version" value="${maven_version}"/>
+ <filter token="version" value="${version}"/>
</filterset>
</copy>
<artifact:install-provider artifactId="wagon-ssh" version="1.0-beta-2"/>
@@ -391,7 +389,7 @@
<macrodef name="sign-maven-artifacts" description="Signs maven artifacts">
<attribute name="artifact.id"/>
<attribute name="prefix.dir" default="${maven.dist.prefix}"/>
- <attribute name="maven.version" default="${maven_version}"/>
+ <attribute name="maven.version" default="${version}"/>
<attribute name="gpg.passphrase"/>
<sequential>
<sign-artifact input.file="@{prefix.dir}/@{artifact.id}/@{maven.version}/@{artifact.id}-@{maven.version}.jar" gpg.passphrase="@{gpg.passphrase}"/>
@@ -404,7 +402,7 @@
<macrodef name="sign-maven-dependency-artifacts" description="Signs maven artifacts">
<attribute name="artifact.id"/>
<attribute name="prefix.dir" default="${maven.dist.prefix}"/>
- <attribute name="maven.version" default="${maven_version}"/>
+ <attribute name="maven.version" default="${version}"/>
<attribute name="gpg.passphrase"/>
<sequential>
<sign-artifact input.file="@{prefix.dir}/@{artifact.id}/@{maven.version}/@{artifact.id}-@{maven.version}.jar" gpg.passphrase="@{gpg.passphrase}"/>
Modified: lucene/dev/branches/realtime_search/solr/contrib/clustering/solr-clustering-pom.xml.template
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/solr/contrib/clustering/solr-clustering-pom.xml.template?rev=966819&r1=966818&r2=966819&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/solr/contrib/clustering/solr-clustering-pom.xml.template (original)
+++ lucene/dev/branches/realtime_search/solr/contrib/clustering/solr-clustering-pom.xml.template Thu Jul 22 19:34:35 2010
@@ -26,25 +26,25 @@
<parent>
<groupId>org.apache.solr</groupId>
<artifactId>solr-parent</artifactId>
- <version>@maven_version@</version>
+ <version>@version@</version>
</parent>
<groupId>org.apache.solr</groupId>
<artifactId>solr-clustering</artifactId>
<name>Apache Solr Clustering</name>
- <version>@maven_version@</version>
+ <version>@version@</version>
<description>Apache Solr Clustering</description>
<packaging>jar</packaging>
<dependencies>
<dependency>
<groupId>org.apache.solr</groupId>
<artifactId>solr-solrj</artifactId>
- <version>@maven_version@</version>
+ <version>@version@</version>
</dependency>
<dependency>
<groupId>org.apache.solr</groupId>
<artifactId>solr-core</artifactId>
- <version>@maven_version@</version>
+ <version>@version@</version>
</dependency>
<!-- Carrot -->
<dependency>
Modified: lucene/dev/branches/realtime_search/solr/contrib/dataimporthandler/build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/solr/contrib/dataimporthandler/build.xml?rev=966819&r1=966818&r2=966819&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/solr/contrib/dataimporthandler/build.xml (original)
+++ lucene/dev/branches/realtime_search/solr/contrib/dataimporthandler/build.xml Thu Jul 22 19:34:35 2010
@@ -55,6 +55,8 @@
<path refid="classpath.jetty" />
<pathelement path="target/classes" />
<pathelement path="target/test-classes" />
+ <pathelement location="${solr-path}/build/tests"/> <!-- include solr test code -->
+ <pathelement location="${solr-path}/../lucene/build/classes/test" /> <!-- include some lucene test code -->
<pathelement path="${java.class.path}"/>
</path>
@@ -65,6 +67,8 @@
<pathelement path="target/extras/classes" />
<pathelement path="target/test-classes" />
<pathelement path="target/extras/test-classes" />
+ <pathelement location="${solr-path}/build/tests"/> <!-- include solr test code -->
+ <pathelement location="${solr-path}/../lucene/build/classes/test" /> <!-- include some lucene test code -->
<pathelement path="${java.class.path}"/>
</path>
Modified: lucene/dev/branches/realtime_search/solr/contrib/dataimporthandler/solr-dataimporthandler-extras-pom.xml.template
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/solr/contrib/dataimporthandler/solr-dataimporthandler-extras-pom.xml.template?rev=966819&r1=966818&r2=966819&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/solr/contrib/dataimporthandler/solr-dataimporthandler-extras-pom.xml.template (original)
+++ lucene/dev/branches/realtime_search/solr/contrib/dataimporthandler/solr-dataimporthandler-extras-pom.xml.template Thu Jul 22 19:34:35 2010
@@ -26,13 +26,13 @@
<parent>
<groupId>org.apache.solr</groupId>
<artifactId>solr-parent</artifactId>
- <version>@maven_version@</version>
+ <version>@version@</version>
</parent>
<groupId>org.apache.solr</groupId>
<artifactId>solr-dataimporthandler-extras</artifactId>
<name>Apache Solr DataImportHandler Extras</name>
- <version>@maven_version@</version>
+ <version>@version@</version>
<description>Apache Solr DataImportHandler Extras</description>
<packaging>jar</packaging>
Modified: lucene/dev/branches/realtime_search/solr/contrib/dataimporthandler/solr-dataimporthandler-pom.xml.template
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/solr/contrib/dataimporthandler/solr-dataimporthandler-pom.xml.template?rev=966819&r1=966818&r2=966819&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/solr/contrib/dataimporthandler/solr-dataimporthandler-pom.xml.template (original)
+++ lucene/dev/branches/realtime_search/solr/contrib/dataimporthandler/solr-dataimporthandler-pom.xml.template Thu Jul 22 19:34:35 2010
@@ -26,13 +26,13 @@
<parent>
<groupId>org.apache.solr</groupId>
<artifactId>solr-parent</artifactId>
- <version>@maven_version@</version>
+ <version>@version@</version>
</parent>
<groupId>org.apache.solr</groupId>
<artifactId>solr-dataimporthandler</artifactId>
<name>Apache Solr DataImportHandler</name>
- <version>@maven_version@</version>
+ <version>@version@</version>
<description>Apache Solr DataImportHandler</description>
<packaging>jar</packaging>
Modified: lucene/dev/branches/realtime_search/solr/contrib/dataimporthandler/src/extras/test/java/org/apache/solr/handler/dataimport/TestMailEntityProcessor.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/solr/contrib/dataimporthandler/src/extras/test/java/org/apache/solr/handler/dataimport/TestMailEntityProcessor.java?rev=966819&r1=966818&r2=966819&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/solr/contrib/dataimporthandler/src/extras/test/java/org/apache/solr/handler/dataimport/TestMailEntityProcessor.java (original)
+++ lucene/dev/branches/realtime_search/solr/contrib/dataimporthandler/src/extras/test/java/org/apache/solr/handler/dataimport/TestMailEntityProcessor.java Thu Jul 22 19:34:35 2010
@@ -17,6 +17,8 @@
package org.apache.solr.handler.dataimport;
import junit.framework.Assert;
+
+import org.apache.solr.SolrTestCaseJ4;
import org.apache.solr.common.SolrInputDocument;
import org.junit.Ignore;
import org.junit.Test;
@@ -45,7 +47,7 @@ import java.util.Map;
* @see org.apache.solr.handler.dataimport.MailEntityProcessor
* @since solr 1.4
*/
-public class TestMailEntityProcessor {
+public class TestMailEntityProcessor extends SolrTestCaseJ4 {
// Credentials
private static final String user = "user";
Modified: lucene/dev/branches/realtime_search/solr/contrib/dataimporthandler/src/extras/test/java/org/apache/solr/handler/dataimport/TestTikaEntityProcessor.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/solr/contrib/dataimporthandler/src/extras/test/java/org/apache/solr/handler/dataimport/TestTikaEntityProcessor.java?rev=966819&r1=966818&r2=966819&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/solr/contrib/dataimporthandler/src/extras/test/java/org/apache/solr/handler/dataimport/TestTikaEntityProcessor.java (original)
+++ lucene/dev/branches/realtime_search/solr/contrib/dataimporthandler/src/extras/test/java/org/apache/solr/handler/dataimport/TestTikaEntityProcessor.java Thu Jul 22 19:34:35 2010
@@ -23,7 +23,7 @@ import org.junit.Before;
* @version $Id$
* @since solr 1.5
*/
-public class TestTikaEntityProcessor extends AbstractDataImportHandlerTest {
+public class TestTikaEntityProcessor extends AbstractDataImportHandlerTestCase {
@Before
public void setUp() throws Exception {
Modified: lucene/dev/branches/realtime_search/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestCachedSqlEntityProcessor.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestCachedSqlEntityProcessor.java?rev=966819&r1=966818&r2=966819&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestCachedSqlEntityProcessor.java (original)
+++ lucene/dev/branches/realtime_search/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestCachedSqlEntityProcessor.java Thu Jul 22 19:34:35 2010
@@ -16,6 +16,7 @@
*/
package org.apache.solr.handler.dataimport;
+import org.apache.solr.SolrTestCaseJ4;
import org.junit.Assert;
import org.junit.Test;
@@ -32,24 +33,24 @@ import java.util.Map;
* @version $Id$
* @since solr 1.3
*/
-public class TestCachedSqlEntityProcessor {
+public class TestCachedSqlEntityProcessor extends SolrTestCaseJ4 {
@Test
public void withoutWhereClause() {
List fields = new ArrayList();
- fields.add(AbstractDataImportHandlerTest.createMap("column", "id"));
- fields.add(AbstractDataImportHandlerTest.createMap("column", "desc"));
+ fields.add(AbstractDataImportHandlerTestCase.createMap("column", "id"));
+ fields.add(AbstractDataImportHandlerTestCase.createMap("column", "desc"));
String q = "select * from x where id=${x.id}";
- Map<String, String> entityAttrs = AbstractDataImportHandlerTest.createMap(
+ Map<String, String> entityAttrs = AbstractDataImportHandlerTestCase.createMap(
"query", q);
MockDataSource ds = new MockDataSource();
VariableResolverImpl vr = new VariableResolverImpl();
- vr.addNamespace("x", AbstractDataImportHandlerTest.createMap("id", 1));
- Context context = AbstractDataImportHandlerTest.getContext(null, vr, ds, Context.FULL_DUMP, fields, entityAttrs);
+ vr.addNamespace("x", AbstractDataImportHandlerTestCase.createMap("id", 1));
+ Context context = AbstractDataImportHandlerTestCase.getContext(null, vr, ds, Context.FULL_DUMP, fields, entityAttrs);
List<Map<String, Object>> rows = new ArrayList<Map<String, Object>>();
- rows.add(AbstractDataImportHandlerTest.createMap("id", 1, "desc", "one"));
- rows.add(AbstractDataImportHandlerTest.createMap("id", 1, "desc",
+ rows.add(AbstractDataImportHandlerTestCase.createMap("id", 1, "desc", "one"));
+ rows.add(AbstractDataImportHandlerTestCase.createMap("id", 1, "desc",
"another one"));
MockDataSource.setIterator(vr.replaceTokens(q), rows.iterator());
EntityProcessor csep = new EntityProcessorWrapper( new CachedSqlEntityProcessor(), null);
@@ -79,19 +80,19 @@ public class TestCachedSqlEntityProcesso
@Test
public void withoutWhereClauseWithTransformers() {
List fields = new ArrayList();
- fields.add(AbstractDataImportHandlerTest.createMap("column", "id"));
- fields.add(AbstractDataImportHandlerTest.createMap("column", "desc"));
+ fields.add(AbstractDataImportHandlerTestCase.createMap("column", "id"));
+ fields.add(AbstractDataImportHandlerTestCase.createMap("column", "desc"));
String q = "select * from x where id=${x.id}";
- Map<String, String> entityAttrs = AbstractDataImportHandlerTest.createMap(
+ Map<String, String> entityAttrs = AbstractDataImportHandlerTestCase.createMap(
"query", q, "transformer", UppercaseTransformer.class.getName());
MockDataSource ds = new MockDataSource();
VariableResolverImpl vr = new VariableResolverImpl();
- vr.addNamespace("x", AbstractDataImportHandlerTest.createMap("id", 1));
- Context context = AbstractDataImportHandlerTest.getContext(null, vr, ds, Context.FULL_DUMP, fields, entityAttrs);
+ vr.addNamespace("x", AbstractDataImportHandlerTestCase.createMap("id", 1));
+ Context context = AbstractDataImportHandlerTestCase.getContext(null, vr, ds, Context.FULL_DUMP, fields, entityAttrs);
List<Map<String, Object>> rows = new ArrayList<Map<String, Object>>();
- rows.add(AbstractDataImportHandlerTest.createMap("id", 1, "desc", "one"));
- rows.add(AbstractDataImportHandlerTest.createMap("id", 1, "desc",
+ rows.add(AbstractDataImportHandlerTestCase.createMap("id", 1, "desc", "one"));
+ rows.add(AbstractDataImportHandlerTestCase.createMap("id", 1, "desc",
"another one"));
MockDataSource.setIterator(vr.replaceTokens(q), rows.iterator());
EntityProcessor csep = new EntityProcessorWrapper( new CachedSqlEntityProcessor(), null);
@@ -122,19 +123,19 @@ public class TestCachedSqlEntityProcesso
@Test
public void withoutWhereClauseWithMultiRowTransformer() {
List fields = new ArrayList();
- fields.add(AbstractDataImportHandlerTest.createMap("column", "id"));
- fields.add(AbstractDataImportHandlerTest.createMap("column", "desc"));
+ fields.add(AbstractDataImportHandlerTestCase.createMap("column", "id"));
+ fields.add(AbstractDataImportHandlerTestCase.createMap("column", "desc"));
String q = "select * from x where id=${x.id}";
- Map<String, String> entityAttrs = AbstractDataImportHandlerTest.createMap(
+ Map<String, String> entityAttrs = AbstractDataImportHandlerTestCase.createMap(
"query", q, "transformer", DoubleTransformer.class.getName());
MockDataSource ds = new MockDataSource();
VariableResolverImpl vr = new VariableResolverImpl();
- vr.addNamespace("x", AbstractDataImportHandlerTest.createMap("id", 1));
- Context context = AbstractDataImportHandlerTest.getContext(null, vr, ds, Context.FULL_DUMP, fields, entityAttrs);
+ vr.addNamespace("x", AbstractDataImportHandlerTestCase.createMap("id", 1));
+ Context context = AbstractDataImportHandlerTestCase.getContext(null, vr, ds, Context.FULL_DUMP, fields, entityAttrs);
List<Map<String, Object>> rows = new ArrayList<Map<String, Object>>();
- rows.add(AbstractDataImportHandlerTest.createMap("id", 1, "desc", "one"));
- rows.add(AbstractDataImportHandlerTest.createMap("id", 1, "desc",
+ rows.add(AbstractDataImportHandlerTestCase.createMap("id", 1, "desc", "one"));
+ rows.add(AbstractDataImportHandlerTestCase.createMap("id", 1, "desc",
"another one"));
MockDataSource.setIterator(vr.replaceTokens(q), rows.iterator());
EntityProcessor csep = new EntityProcessorWrapper( new CachedSqlEntityProcessor(), null);
@@ -189,43 +190,43 @@ public class TestCachedSqlEntityProcesso
@Test
public void withWhereClause() {
List fields = new ArrayList();
- fields.add(AbstractDataImportHandlerTest.createMap("column", "id"));
- fields.add(AbstractDataImportHandlerTest.createMap("column", "desc"));
+ fields.add(AbstractDataImportHandlerTestCase.createMap("column", "id"));
+ fields.add(AbstractDataImportHandlerTestCase.createMap("column", "desc"));
String q = "select * from x";
- Map<String, String> entityAttrs = AbstractDataImportHandlerTest.createMap(
+ Map<String, String> entityAttrs = AbstractDataImportHandlerTestCase.createMap(
"query", q, EntityProcessorBase.CACHE_KEY,"id", EntityProcessorBase.CACHE_LOOKUP ,"x.id");
MockDataSource ds = new MockDataSource();
VariableResolverImpl vr = new VariableResolverImpl();
- Map xNamespace = AbstractDataImportHandlerTest.createMap("id", 0);
+ Map xNamespace = AbstractDataImportHandlerTestCase.createMap("id", 0);
vr.addNamespace("x", xNamespace);
- Context context = AbstractDataImportHandlerTest.getContext(null, vr, ds, Context.FULL_DUMP, fields, entityAttrs);
+ Context context = AbstractDataImportHandlerTestCase.getContext(null, vr, ds, Context.FULL_DUMP, fields, entityAttrs);
doWhereTest(q, context, ds, xNamespace);
}
@Test
public void withKeyAndLookup() {
List fields = new ArrayList();
- fields.add(AbstractDataImportHandlerTest.createMap("column", "id"));
- fields.add(AbstractDataImportHandlerTest.createMap("column", "desc"));
+ fields.add(AbstractDataImportHandlerTestCase.createMap("column", "id"));
+ fields.add(AbstractDataImportHandlerTestCase.createMap("column", "desc"));
String q = "select * from x";
- Map<String, String> entityAttrs = AbstractDataImportHandlerTest.createMap("query", q, "where", "id=x.id");
+ Map<String, String> entityAttrs = AbstractDataImportHandlerTestCase.createMap("query", q, "where", "id=x.id");
MockDataSource ds = new MockDataSource();
VariableResolverImpl vr = new VariableResolverImpl();
- Map xNamespace = AbstractDataImportHandlerTest.createMap("id", 0);
+ Map xNamespace = AbstractDataImportHandlerTestCase.createMap("id", 0);
vr.addNamespace("x", xNamespace);
- Context context = AbstractDataImportHandlerTest.getContext(null, vr, ds, Context.FULL_DUMP, fields, entityAttrs);
+ Context context = AbstractDataImportHandlerTestCase.getContext(null, vr, ds, Context.FULL_DUMP, fields, entityAttrs);
doWhereTest(q, context, ds, xNamespace);
}
private void doWhereTest(String q, Context context, MockDataSource ds, Map xNamespace) {
List<Map<String, Object>> rows = new ArrayList<Map<String, Object>>();
- rows.add(AbstractDataImportHandlerTest.createMap("id", 1, "desc", "one"));
- rows.add(AbstractDataImportHandlerTest.createMap("id", 2, "desc", "two"));
- rows.add(AbstractDataImportHandlerTest.createMap("id", 2, "desc",
+ rows.add(AbstractDataImportHandlerTestCase.createMap("id", 1, "desc", "one"));
+ rows.add(AbstractDataImportHandlerTestCase.createMap("id", 2, "desc", "two"));
+ rows.add(AbstractDataImportHandlerTestCase.createMap("id", 2, "desc",
"another two"));
- rows.add(AbstractDataImportHandlerTest.createMap("id", 3, "desc", "three"));
- rows.add(AbstractDataImportHandlerTest.createMap("id", 3, "desc", "another three"));
- rows.add(AbstractDataImportHandlerTest.createMap("id", 3, "desc", "another another three"));
+ rows.add(AbstractDataImportHandlerTestCase.createMap("id", 3, "desc", "three"));
+ rows.add(AbstractDataImportHandlerTestCase.createMap("id", 3, "desc", "another three"));
+ rows.add(AbstractDataImportHandlerTestCase.createMap("id", 3, "desc", "another another three"));
MockDataSource.setIterator(q, rows.iterator());
EntityProcessor csep = new EntityProcessorWrapper(new CachedSqlEntityProcessor(), null);
csep.init(context);