You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by us...@apache.org on 2012/08/09 12:21:07 UTC
svn commit: r1371142 [23/32] - in /lucene/dev/branches/lucene3312: ./
dev-tools/ dev-tools/eclipse/ dev-tools/maven/ dev-tools/maven/lucene/
dev-tools/maven/lucene/analysis/common/
dev-tools/maven/lucene/analysis/icu/ dev-tools/maven/lucene/analysis/ku...
Modified: lucene/dev/branches/lucene3312/lucene/analysis/common/src/resources/META-INF/services/org.apache.lucene.analysis.util.TokenFilterFactory
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3312/lucene/analysis/common/src/resources/META-INF/services/org.apache.lucene.analysis.util.TokenFilterFactory?rev=1371142&r1=1371141&r2=1371142&view=diff
==============================================================================
--- lucene/dev/branches/lucene3312/lucene/analysis/common/src/resources/META-INF/services/org.apache.lucene.analysis.util.TokenFilterFactory (original)
+++ lucene/dev/branches/lucene3312/lucene/analysis/common/src/resources/META-INF/services/org.apache.lucene.analysis.util.TokenFilterFactory Thu Aug 9 10:20:53 2012
@@ -40,7 +40,6 @@ org.apache.lucene.analysis.en.PorterStem
org.apache.lucene.analysis.es.SpanishLightStemFilterFactory
org.apache.lucene.analysis.fa.PersianNormalizationFilterFactory
org.apache.lucene.analysis.fi.FinnishLightStemFilterFactory
-org.apache.lucene.analysis.fr.ElisionFilterFactory
org.apache.lucene.analysis.fr.FrenchLightStemFilterFactory
org.apache.lucene.analysis.fr.FrenchMinimalStemFilterFactory
org.apache.lucene.analysis.ga.IrishLowerCaseFilterFactory
@@ -88,3 +87,4 @@ org.apache.lucene.analysis.sv.SwedishLig
org.apache.lucene.analysis.synonym.SynonymFilterFactory
org.apache.lucene.analysis.th.ThaiWordFilterFactory
org.apache.lucene.analysis.tr.TurkishLowerCaseFilterFactory
+org.apache.lucene.analysis.util.ElisionFilterFactory
Modified: lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/cjk/TestCJKBigramFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/cjk/TestCJKBigramFilter.java?rev=1371142&r1=1371141&r2=1371142&view=diff
==============================================================================
--- lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/cjk/TestCJKBigramFilter.java (original)
+++ lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/cjk/TestCJKBigramFilter.java Thu Aug 9 10:20:53 2012
@@ -18,6 +18,7 @@ package org.apache.lucene.analysis.cjk;
*/
import java.io.Reader;
+import java.util.Random;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
@@ -33,6 +34,15 @@ public class TestCJKBigramFilter extends
}
};
+ Analyzer unibiAnalyzer = new Analyzer() {
+ @Override
+ protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+ Tokenizer t = new StandardTokenizer(TEST_VERSION_CURRENT, reader);
+ return new TokenStreamComponents(t,
+ new CJKBigramFilter(t, 0xff, true));
+ }
+ };
+
public void testHuge() throws Exception {
assertAnalyzesTo(analyzer, "å¤ãã®å¦çã試é¨ã«è½ã¡ã" + "å¤ãã®å¦çã試é¨ã«è½ã¡ã" + "å¤ãã®å¦çã試é¨ã«è½ã¡ã"
+ "å¤ãã®å¦çã試é¨ã«è½ã¡ã" + "å¤ãã®å¦çã試é¨ã«è½ã¡ã" + "å¤ãã®å¦çã試é¨ã«è½ã¡ã" + "å¤ãã®å¦çã試é¨ã«è½ã¡ã"
@@ -62,6 +72,96 @@ public class TestCJKBigramFilter extends
}
};
assertAnalyzesTo(a, "å¤ãã®å¦çã試é¨ã«è½ã¡ãã",
- new String[] { "å¤", "ã", "ã®", "å¦ç", "ã", "試é¨", "ã«", "è½", "ã¡", "ã" });
+ new String[] { "å¤", "ã", "ã®", "å¦ç", "ã", "試é¨", "ã«", "è½", "ã¡", "ã" },
+ new int[] { 0, 1, 2, 3, 5, 6, 8, 9, 10, 11 },
+ new int[] { 1, 2, 3, 5, 6, 8, 9, 10, 11, 12 },
+ new String[] { "<SINGLE>", "<HIRAGANA>", "<HIRAGANA>", "<DOUBLE>", "<HIRAGANA>", "<DOUBLE>",
+ "<HIRAGANA>", "<SINGLE>", "<HIRAGANA>", "<HIRAGANA>", "<SINGLE>" },
+ new int[] { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 },
+ new int[] { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 });
+ }
+
+ public void testAllScripts() throws Exception {
+ Analyzer a = new Analyzer() {
+ @Override
+ protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+ Tokenizer t = new StandardTokenizer(TEST_VERSION_CURRENT, reader);
+ return new TokenStreamComponents(t,
+ new CJKBigramFilter(t, 0xff, false));
+ }
+ };
+ assertAnalyzesTo(a, "å¤ãã®å¦çã試é¨ã«è½ã¡ãã",
+ new String[] { "å¤ã", "ãã®", "ã®å¦", "å¦ç", "çã", "ã試", "試é¨", "é¨ã«", "ã«è½", "è½ã¡", "ã¡ã" });
+ }
+
+ public void testUnigramsAndBigramsAllScripts() throws Exception {
+ assertAnalyzesTo(unibiAnalyzer, "å¤ãã®å¦çã試é¨ã«è½ã¡ãã",
+ new String[] {
+ "å¤", "å¤ã", "ã", "ãã®", "ã®", "ã®å¦", "å¦", "å¦ç", "ç",
+ "çã", "ã", "ã試", "試", "試é¨", "é¨", "é¨ã«", "ã«",
+ "ã«è½", "è½", "è½ã¡", "ã¡", "ã¡ã", "ã"
+ },
+ new int[] { 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6,
+ 6, 7, 7, 8, 8, 9, 9, 10, 10, 11 },
+ new int[] { 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7,
+ 8, 8, 9, 9, 10, 10, 11, 11, 12, 12 },
+ new String[] { "<SINGLE>", "<DOUBLE>", "<SINGLE>", "<DOUBLE>", "<SINGLE>", "<DOUBLE>", "<SINGLE>", "<DOUBLE>",
+ "<SINGLE>", "<DOUBLE>", "<SINGLE>", "<DOUBLE>", "<SINGLE>", "<DOUBLE>", "<SINGLE>", "<DOUBLE>",
+ "<SINGLE>", "<DOUBLE>", "<SINGLE>", "<DOUBLE>", "<SINGLE>", "<DOUBLE>", "<SINGLE>" },
+ new int[] { 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
+ 0, 1, 0, 1, 0, 1, 0, 1, 0, 1 },
+ new int[] { 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1,
+ 2, 1, 2, 1, 2, 1, 2, 1, 2, 1 }
+ );
+ }
+
+ public void testUnigramsAndBigramsHanOnly() throws Exception {
+ Analyzer a = new Analyzer() {
+ @Override
+ protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+ Tokenizer t = new StandardTokenizer(TEST_VERSION_CURRENT, reader);
+ return new TokenStreamComponents(t, new CJKBigramFilter(t, CJKBigramFilter.HAN, true));
+ }
+ };
+ assertAnalyzesTo(a, "å¤ãã®å¦çã試é¨ã«è½ã¡ãã",
+ new String[] { "å¤", "ã", "ã®", "å¦", "å¦ç", "ç", "ã", "試", "試é¨", "é¨", "ã«", "è½", "ã¡", "ã" },
+ new int[] { 0, 1, 2, 3, 3, 4, 5, 6, 6, 7, 8, 9, 10, 11 },
+ new int[] { 1, 2, 3, 4, 5, 5, 6, 7, 8, 8, 9, 10, 11, 12 },
+ new String[] { "<SINGLE>", "<HIRAGANA>", "<HIRAGANA>", "<SINGLE>", "<DOUBLE>",
+ "<SINGLE>", "<HIRAGANA>", "<SINGLE>", "<DOUBLE>", "<SINGLE>",
+ "<HIRAGANA>", "<SINGLE>", "<HIRAGANA>", "<HIRAGANA>", "<SINGLE>" },
+ new int[] { 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1 },
+ new int[] { 1, 1, 1, 1, 2, 1, 1, 1, 2, 1, 1, 1, 1, 1 });
+ }
+
+ public void testUnigramsAndBigramsHuge() throws Exception {
+ assertAnalyzesTo(unibiAnalyzer, "å¤ãã®å¦çã試é¨ã«è½ã¡ã" + "å¤ãã®å¦çã試é¨ã«è½ã¡ã" + "å¤ãã®å¦çã試é¨ã«è½ã¡ã"
+ + "å¤ãã®å¦çã試é¨ã«è½ã¡ã" + "å¤ãã®å¦çã試é¨ã«è½ã¡ã" + "å¤ãã®å¦çã試é¨ã«è½ã¡ã" + "å¤ãã®å¦çã試é¨ã«è½ã¡ã"
+ + "å¤ãã®å¦çã試é¨ã«è½ã¡ã" + "å¤ãã®å¦çã試é¨ã«è½ã¡ã" + "å¤ãã®å¦çã試é¨ã«è½ã¡ã" + "å¤ãã®å¦çã試é¨ã«è½ã¡ã",
+ new String[] {
+ "å¤", "å¤ã", "ã", "ãã®", "ã®", "ã®å¦", "å¦", "å¦ç", "ç", "çã", "ã", "ã試", "試", "試é¨", "é¨", "é¨ã«", "ã«", "ã«è½", "è½", "è½ã¡", "ã¡", "ã¡ã", "ã", "ãå¤",
+ "å¤", "å¤ã", "ã", "ãã®", "ã®", "ã®å¦", "å¦", "å¦ç", "ç", "çã", "ã", "ã試", "試", "試é¨", "é¨", "é¨ã«", "ã«", "ã«è½", "è½", "è½ã¡", "ã¡", "ã¡ã", "ã", "ãå¤",
+ "å¤", "å¤ã", "ã", "ãã®", "ã®", "ã®å¦", "å¦", "å¦ç", "ç", "çã", "ã", "ã試", "試", "試é¨", "é¨", "é¨ã«", "ã«", "ã«è½", "è½", "è½ã¡", "ã¡", "ã¡ã", "ã", "ãå¤",
+ "å¤", "å¤ã", "ã", "ãã®", "ã®", "ã®å¦", "å¦", "å¦ç", "ç", "çã", "ã", "ã試", "試", "試é¨", "é¨", "é¨ã«", "ã«", "ã«è½", "è½", "è½ã¡", "ã¡", "ã¡ã", "ã", "ãå¤",
+ "å¤", "å¤ã", "ã", "ãã®", "ã®", "ã®å¦", "å¦", "å¦ç", "ç", "çã", "ã", "ã試", "試", "試é¨", "é¨", "é¨ã«", "ã«", "ã«è½", "è½", "è½ã¡", "ã¡", "ã¡ã", "ã", "ãå¤",
+ "å¤", "å¤ã", "ã", "ãã®", "ã®", "ã®å¦", "å¦", "å¦ç", "ç", "çã", "ã", "ã試", "試", "試é¨", "é¨", "é¨ã«", "ã«", "ã«è½", "è½", "è½ã¡", "ã¡", "ã¡ã", "ã", "ãå¤",
+ "å¤", "å¤ã", "ã", "ãã®", "ã®", "ã®å¦", "å¦", "å¦ç", "ç", "çã", "ã", "ã試", "試", "試é¨", "é¨", "é¨ã«", "ã«", "ã«è½", "è½", "è½ã¡", "ã¡", "ã¡ã", "ã", "ãå¤",
+ "å¤", "å¤ã", "ã", "ãã®", "ã®", "ã®å¦", "å¦", "å¦ç", "ç", "çã", "ã", "ã試", "試", "試é¨", "é¨", "é¨ã«", "ã«", "ã«è½", "è½", "è½ã¡", "ã¡", "ã¡ã", "ã", "ãå¤",
+ "å¤", "å¤ã", "ã", "ãã®", "ã®", "ã®å¦", "å¦", "å¦ç", "ç", "çã", "ã", "ã試", "試", "試é¨", "é¨", "é¨ã«", "ã«", "ã«è½", "è½", "è½ã¡", "ã¡", "ã¡ã", "ã", "ãå¤",
+ "å¤", "å¤ã", "ã", "ãã®", "ã®", "ã®å¦", "å¦", "å¦ç", "ç", "çã", "ã", "ã試", "試", "試é¨", "é¨", "é¨ã«", "ã«", "ã«è½", "è½", "è½ã¡", "ã¡", "ã¡ã", "ã", "ãå¤",
+ "å¤", "å¤ã", "ã", "ãã®", "ã®", "ã®å¦", "å¦", "å¦ç", "ç", "çã", "ã", "ã試", "試", "試é¨", "é¨", "é¨ã«", "ã«", "ã«è½", "è½", "è½ã¡", "ã¡", "ã¡ã", "ã"
+ }
+ );
+ }
+
+ /** blast some random strings through the analyzer */
+ public void testRandomUnibiStrings() throws Exception {
+ checkRandomData(random(), unibiAnalyzer, 1000*RANDOM_MULTIPLIER);
+ }
+
+ /** blast some random strings through the analyzer */
+ public void testRandomUnibiHugeStrings() throws Exception {
+ Random random = random();
+ checkRandomData(random, unibiAnalyzer, 100*RANDOM_MULTIPLIER, 8192);
}
}
Modified: lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/cjk/TestCJKBigramFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/cjk/TestCJKBigramFilterFactory.java?rev=1371142&r1=1371141&r2=1371142&view=diff
==============================================================================
--- lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/cjk/TestCJKBigramFilterFactory.java (original)
+++ lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/cjk/TestCJKBigramFilterFactory.java Thu Aug 9 10:20:53 2012
@@ -52,4 +52,16 @@ public class TestCJKBigramFilterFactory
assertTokenStreamContents(stream,
new String[] { "å¤", "ã", "ã®", "å¦ç", "ã", "試é¨", "ã«", "è½", "ã¡", "ã" });
}
+
+ public void testHanOnlyUnigrams() throws Exception {
+ Reader reader = new StringReader("å¤ãã®å¦çã試é¨ã«è½ã¡ãã");
+ CJKBigramFilterFactory factory = new CJKBigramFilterFactory();
+ Map<String,String> args = new HashMap<String,String>();
+ args.put("hiragana", "false");
+ args.put("outputUnigrams", "true");
+ factory.init(args);
+ TokenStream stream = factory.create(new StandardTokenizer(TEST_VERSION_CURRENT, reader));
+ assertTokenStreamContents(stream,
+ new String[] { "å¤", "ã", "ã®", "å¦", "å¦ç", "ç", "ã", "試", "試é¨", "é¨", "ã«", "è½", "ã¡", "ã" });
+ }
}
Modified: lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/commongrams/TestCommonGramsFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/commongrams/TestCommonGramsFilterFactory.java?rev=1371142&r1=1371141&r2=1371142&view=diff
==============================================================================
--- lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/commongrams/TestCommonGramsFilterFactory.java (original)
+++ lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/commongrams/TestCommonGramsFilterFactory.java Thu Aug 9 10:20:53 2012
@@ -23,7 +23,7 @@ import org.apache.lucene.analysis.TokenS
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.core.TestStopFilter;
import org.apache.lucene.analysis.util.CharArraySet;
-import org.apache.lucene.analysis.util.ResourceAsStreamResourceLoader;
+import org.apache.lucene.analysis.util.ClasspathResourceLoader;
import org.apache.lucene.analysis.util.ResourceLoader;
import java.io.StringReader;
@@ -39,7 +39,7 @@ import java.util.HashMap;
public class TestCommonGramsFilterFactory extends BaseTokenStreamTestCase {
public void testInform() throws Exception {
- ResourceLoader loader = new ResourceAsStreamResourceLoader(TestStopFilter.class);
+ ResourceLoader loader = new ClasspathResourceLoader(TestStopFilter.class);
assertTrue("loader is null and it shouldn't be", loader != null);
CommonGramsFilterFactory factory = new CommonGramsFilterFactory();
Map<String, String> args = new HashMap<String, String>();
@@ -89,7 +89,7 @@ public class TestCommonGramsFilterFactor
* If no words are provided, then a set of english default stopwords is used.
*/
public void testDefaults() throws Exception {
- ResourceLoader loader = new ResourceAsStreamResourceLoader(TestStopFilter.class);
+ ResourceLoader loader = new ClasspathResourceLoader(TestStopFilter.class);
assertTrue("loader is null and it shouldn't be", loader != null);
CommonGramsFilterFactory factory = new CommonGramsFilterFactory();
factory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
Modified: lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/commongrams/TestCommonGramsQueryFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/commongrams/TestCommonGramsQueryFilterFactory.java?rev=1371142&r1=1371141&r2=1371142&view=diff
==============================================================================
--- lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/commongrams/TestCommonGramsQueryFilterFactory.java (original)
+++ lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/commongrams/TestCommonGramsQueryFilterFactory.java Thu Aug 9 10:20:53 2012
@@ -23,7 +23,7 @@ import org.apache.lucene.analysis.TokenS
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.core.TestStopFilter;
import org.apache.lucene.analysis.util.CharArraySet;
-import org.apache.lucene.analysis.util.ResourceAsStreamResourceLoader;
+import org.apache.lucene.analysis.util.ClasspathResourceLoader;
import org.apache.lucene.analysis.util.ResourceLoader;
import java.io.StringReader;
@@ -39,7 +39,7 @@ import java.util.HashMap;
public class TestCommonGramsQueryFilterFactory extends BaseTokenStreamTestCase {
public void testInform() throws Exception {
- ResourceLoader loader = new ResourceAsStreamResourceLoader(TestStopFilter.class);
+ ResourceLoader loader = new ClasspathResourceLoader(TestStopFilter.class);
assertTrue("loader is null and it shouldn't be", loader != null);
CommonGramsQueryFilterFactory factory = new CommonGramsQueryFilterFactory();
Map<String, String> args = new HashMap<String, String>();
@@ -89,7 +89,7 @@ public class TestCommonGramsQueryFilterF
* If no words are provided, then a set of english default stopwords is used.
*/
public void testDefaults() throws Exception {
- ResourceLoader loader = new ResourceAsStreamResourceLoader(TestStopFilter.class);
+ ResourceLoader loader = new ClasspathResourceLoader(TestStopFilter.class);
assertTrue("loader is null and it shouldn't be", loader != null);
CommonGramsQueryFilterFactory factory = new CommonGramsQueryFilterFactory();
factory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
Modified: lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/compound/TestDictionaryCompoundWordTokenFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/compound/TestDictionaryCompoundWordTokenFilterFactory.java?rev=1371142&r1=1371141&r2=1371142&view=diff
==============================================================================
--- lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/compound/TestDictionaryCompoundWordTokenFilterFactory.java (original)
+++ lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/compound/TestDictionaryCompoundWordTokenFilterFactory.java Thu Aug 9 10:20:53 2012
@@ -26,7 +26,7 @@ import org.apache.lucene.analysis.BaseTo
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
-import org.apache.lucene.analysis.util.ResourceAsStreamResourceLoader;
+import org.apache.lucene.analysis.util.ClasspathResourceLoader;
import org.apache.lucene.analysis.util.ResourceLoader;
/**
@@ -40,7 +40,7 @@ public class TestDictionaryCompoundWordT
Reader reader = new StringReader("I like to play softball");
Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
DictionaryCompoundWordTokenFilterFactory factory = new DictionaryCompoundWordTokenFilterFactory();
- ResourceLoader loader = new ResourceAsStreamResourceLoader(getClass());
+ ResourceLoader loader = new ClasspathResourceLoader(getClass());
Map<String,String> args = new HashMap<String,String>();
args.put("dictionary", "compoundDictionary.txt");
factory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
Modified: lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/compound/TestHyphenationCompoundWordTokenFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/compound/TestHyphenationCompoundWordTokenFilterFactory.java?rev=1371142&r1=1371141&r2=1371142&view=diff
==============================================================================
--- lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/compound/TestHyphenationCompoundWordTokenFilterFactory.java (original)
+++ lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/compound/TestHyphenationCompoundWordTokenFilterFactory.java Thu Aug 9 10:20:53 2012
@@ -26,7 +26,7 @@ import org.apache.lucene.analysis.BaseTo
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
-import org.apache.lucene.analysis.util.ResourceAsStreamResourceLoader;
+import org.apache.lucene.analysis.util.ClasspathResourceLoader;
import org.apache.lucene.analysis.util.ResourceLoader;
/**
@@ -40,7 +40,7 @@ public class TestHyphenationCompoundWord
Reader reader = new StringReader("min veninde som er lidt af en læsehest");
Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
HyphenationCompoundWordTokenFilterFactory factory = new HyphenationCompoundWordTokenFilterFactory();
- ResourceLoader loader = new ResourceAsStreamResourceLoader(getClass());
+ ResourceLoader loader = new ClasspathResourceLoader(getClass());
Map<String,String> args = new HashMap<String,String>();
args.put("hyphenator", "da_UTF8.xml");
args.put("dictionary", "da_compoundDictionary.txt");
@@ -64,7 +64,7 @@ public class TestHyphenationCompoundWord
Reader reader = new StringReader("basketballkurv");
Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
HyphenationCompoundWordTokenFilterFactory factory = new HyphenationCompoundWordTokenFilterFactory();
- ResourceLoader loader = new ResourceAsStreamResourceLoader(getClass());
+ ResourceLoader loader = new ClasspathResourceLoader(getClass());
Map<String,String> args = new HashMap<String,String>();
args.put("hyphenator", "da_UTF8.xml");
args.put("minSubwordSize", "2");
Modified: lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestClassicAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestClassicAnalyzer.java?rev=1371142&r1=1371141&r2=1371142&view=diff
==============================================================================
--- lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestClassicAnalyzer.java (original)
+++ lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestClassicAnalyzer.java Thu Aug 9 10:20:53 2012
@@ -285,8 +285,7 @@ public class TestClassicAnalyzer extends
DocsAndPositionsEnum tps = MultiFields.getTermPositionsEnum(reader,
MultiFields.getLiveDocs(reader),
"content",
- new BytesRef("another"),
- false);
+ new BytesRef("another"));
assertTrue(tps.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
assertEquals(1, tps.freq());
assertEquals(3, tps.nextPosition());
Modified: lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestKeywordAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestKeywordAnalyzer.java?rev=1371142&r1=1371141&r2=1371142&view=diff
==============================================================================
--- lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestKeywordAnalyzer.java (original)
+++ lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestKeywordAnalyzer.java Thu Aug 9 10:20:53 2012
@@ -103,7 +103,7 @@ public class TestKeywordAnalyzer extends
new BytesRef("Q36"),
MultiFields.getLiveDocs(reader),
null,
- false);
+ 0);
assertTrue(td.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
td = _TestUtil.docs(random(),
reader,
@@ -111,7 +111,7 @@ public class TestKeywordAnalyzer extends
new BytesRef("Q37"),
MultiFields.getLiveDocs(reader),
null,
- false);
+ 0);
assertTrue(td.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
}
Modified: lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestStopFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestStopFilterFactory.java?rev=1371142&r1=1371141&r2=1371142&view=diff
==============================================================================
--- lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestStopFilterFactory.java (original)
+++ lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestStopFilterFactory.java Thu Aug 9 10:20:53 2012
@@ -19,7 +19,7 @@ package org.apache.lucene.analysis.core;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.util.CharArraySet;
-import org.apache.lucene.analysis.util.ResourceAsStreamResourceLoader;
+import org.apache.lucene.analysis.util.ClasspathResourceLoader;
import org.apache.lucene.analysis.util.ResourceLoader;
import java.util.Map;
@@ -32,7 +32,7 @@ import java.util.HashMap;
public class TestStopFilterFactory extends BaseTokenStreamTestCase {
public void testInform() throws Exception {
- ResourceLoader loader = new ResourceAsStreamResourceLoader(getClass());
+ ResourceLoader loader = new ClasspathResourceLoader(getClass());
assertTrue("loader is null and it shouldn't be", loader != null);
StopFilterFactory factory = new StopFilterFactory();
Map<String, String> args = new HashMap<String, String>();
Modified: lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestTypeTokenFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestTypeTokenFilterFactory.java?rev=1371142&r1=1371141&r2=1371142&view=diff
==============================================================================
--- lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestTypeTokenFilterFactory.java (original)
+++ lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestTypeTokenFilterFactory.java Thu Aug 9 10:20:53 2012
@@ -19,7 +19,7 @@ package org.apache.lucene.analysis.core;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.NumericTokenStream;
-import org.apache.lucene.analysis.util.ResourceAsStreamResourceLoader;
+import org.apache.lucene.analysis.util.ClasspathResourceLoader;
import org.apache.lucene.analysis.util.ResourceLoader;
import org.junit.Test;
@@ -34,7 +34,7 @@ public class TestTypeTokenFilterFactory
@Test
public void testInform() throws Exception {
- ResourceLoader loader = new ResourceAsStreamResourceLoader(getClass());
+ ResourceLoader loader = new ClasspathResourceLoader(getClass());
TypeTokenFilterFactory factory = new TypeTokenFilterFactory();
Map<String, String> args = new HashMap<String, String>();
args.put("types", "stoptypes-1.txt");
@@ -94,7 +94,7 @@ public class TestTypeTokenFilterFactory
args.put("enablePositionIncrements", "false");
typeTokenFilterFactory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
typeTokenFilterFactory.init(args);
- typeTokenFilterFactory.inform(new ResourceAsStreamResourceLoader(getClass()));
+ typeTokenFilterFactory.inform(new ClasspathResourceLoader(getClass()));
fail("not supplying 'types' parameter should cause an IllegalArgumentException");
} catch (IllegalArgumentException e) {
// everything ok
Modified: lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestHunspellStemFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestHunspellStemFilterFactory.java?rev=1371142&r1=1371141&r2=1371142&view=diff
==============================================================================
--- lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestHunspellStemFilterFactory.java (original)
+++ lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestHunspellStemFilterFactory.java Thu Aug 9 10:20:53 2012
@@ -25,7 +25,7 @@ import java.util.Map;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.util.ResourceAsStreamResourceLoader;
+import org.apache.lucene.analysis.util.ClasspathResourceLoader;
/**
* Simple tests to ensure the Hunspell stemmer loads from factory
@@ -38,7 +38,7 @@ public class TestHunspellStemFilterFacto
args.put("affix", "test.aff");
factory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
factory.init(args);
- factory.inform(new ResourceAsStreamResourceLoader(getClass()));
+ factory.inform(new ClasspathResourceLoader(getClass()));
Reader reader = new StringReader("abc");
TokenStream stream = factory.create(new MockTokenizer(reader, MockTokenizer.WHITESPACE, false));
Modified: lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestKeepFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestKeepFilterFactory.java?rev=1371142&r1=1371141&r2=1371142&view=diff
==============================================================================
--- lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestKeepFilterFactory.java (original)
+++ lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestKeepFilterFactory.java Thu Aug 9 10:20:53 2012
@@ -19,7 +19,7 @@ package org.apache.lucene.analysis.misce
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.util.CharArraySet;
-import org.apache.lucene.analysis.util.ResourceAsStreamResourceLoader;
+import org.apache.lucene.analysis.util.ClasspathResourceLoader;
import org.apache.lucene.analysis.util.ResourceLoader;
import java.util.Map;
@@ -32,7 +32,7 @@ import java.util.HashMap;
public class TestKeepFilterFactory extends BaseTokenStreamTestCase {
public void testInform() throws Exception {
- ResourceLoader loader = new ResourceAsStreamResourceLoader(getClass());
+ ResourceLoader loader = new ClasspathResourceLoader(getClass());
assertTrue("loader is null and it shouldn't be", loader != null);
KeepWordFilterFactory factory = new KeepWordFilterFactory();
Map<String, String> args = new HashMap<String, String>();
Modified: lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/sinks/TestTeeSinkTokenFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/sinks/TestTeeSinkTokenFilter.java?rev=1371142&r1=1371141&r2=1371142&view=diff
==============================================================================
--- lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/sinks/TestTeeSinkTokenFilter.java (original)
+++ lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/sinks/TestTeeSinkTokenFilter.java Thu Aug 9 10:20:53 2012
@@ -111,7 +111,7 @@ public class TestTeeSinkTokenFilter exte
TermsEnum termsEnum = vector.iterator(null);
termsEnum.next();
assertEquals(2, termsEnum.totalTermFreq());
- DocsAndPositionsEnum positions = termsEnum.docsAndPositions(null, null, true);
+ DocsAndPositionsEnum positions = termsEnum.docsAndPositions(null, null);
assertTrue(positions.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
assertEquals(2, positions.freq());
positions.nextPosition();
Modified: lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/snowball/TestSnowball.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/snowball/TestSnowball.java?rev=1371142&r1=1371141&r2=1371142&view=diff
==============================================================================
--- lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/snowball/TestSnowball.java (original)
+++ lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/snowball/TestSnowball.java Thu Aug 9 10:20:53 2012
@@ -25,6 +25,7 @@ import org.apache.lucene.analysis.Analyz
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.Analyzer.TokenStreamComponents;
import org.apache.lucene.analysis.core.KeywordTokenizer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
@@ -116,4 +117,21 @@ public class TestSnowball extends BaseTo
checkOneTermReuse(a, "", "");
}
}
+
+ public void testRandomStrings() throws IOException {
+ for (String lang : SNOWBALL_LANGS) {
+ checkRandomStrings(lang);
+ }
+ }
+
+ public void checkRandomStrings(final String snowballLanguage) throws IOException {
+ Analyzer a = new Analyzer() {
+ @Override
+ protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+ Tokenizer t = new MockTokenizer(reader);
+ return new TokenStreamComponents(t, new SnowballFilter(t, snowballLanguage));
+ }
+ };
+ checkRandomData(random(), a, 1000*RANDOM_MULTIPLIER);
+ }
}
\ No newline at end of file
Modified: lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestSynonymFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestSynonymFilterFactory.java?rev=1371142&r1=1371141&r2=1371142&view=diff
==============================================================================
--- lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestSynonymFilterFactory.java (original)
+++ lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestSynonymFilterFactory.java Thu Aug 9 10:20:53 2012
@@ -25,7 +25,7 @@ import org.apache.lucene.analysis.BaseTo
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.synonym.SynonymFilter;
-import org.apache.lucene.analysis.util.ResourceAsStreamResourceLoader;
+import org.apache.lucene.analysis.util.ClasspathResourceLoader;
import org.apache.lucene.analysis.util.StringMockResourceLoader;
public class TestSynonymFilterFactory extends BaseTokenStreamTestCase {
@@ -36,7 +36,7 @@ public class TestSynonymFilterFactory ex
args.put("synonyms", "synonyms.txt");
factory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
factory.init(args);
- factory.inform(new ResourceAsStreamResourceLoader(getClass()));
+ factory.inform(new ClasspathResourceLoader(getClass()));
TokenStream ts = factory.create(new MockTokenizer(new StringReader("GB"), MockTokenizer.WHITESPACE, false));
assertTrue(ts instanceof SynonymFilter);
assertTokenStreamContents(ts,
Modified: lucene/dev/branches/lucene3312/lucene/analysis/stempel/src/test/org/apache/lucene/analysis/stempel/TestStempelPolishStemFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3312/lucene/analysis/stempel/src/test/org/apache/lucene/analysis/stempel/TestStempelPolishStemFilterFactory.java?rev=1371142&r1=1371141&r2=1371142&view=diff
==============================================================================
--- lucene/dev/branches/lucene3312/lucene/analysis/stempel/src/test/org/apache/lucene/analysis/stempel/TestStempelPolishStemFilterFactory.java (original)
+++ lucene/dev/branches/lucene3312/lucene/analysis/stempel/src/test/org/apache/lucene/analysis/stempel/TestStempelPolishStemFilterFactory.java Thu Aug 9 10:20:53 2012
@@ -22,7 +22,7 @@ import java.io.StringReader;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
-import org.apache.lucene.analysis.util.ResourceAsStreamResourceLoader;
+import org.apache.lucene.analysis.util.ClasspathResourceLoader;
/**
* Tests for {@link StempelPolishStemFilterFactory}
@@ -31,7 +31,7 @@ public class TestStempelPolishStemFilter
public void testBasics() throws Exception {
StringReader document = new StringReader("studenta studenci");
StempelPolishStemFilterFactory factory = new StempelPolishStemFilterFactory();
- factory.inform(new ResourceAsStreamResourceLoader(getClass()));
+ factory.inform(new ClasspathResourceLoader(getClass()));
TokenStream ts = factory.create(new WhitespaceTokenizer(TEST_VERSION_CURRENT, document));
assertTokenStreamContents(ts,
new String[] { "student", "student" });
Modified: lucene/dev/branches/lucene3312/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/TaskSequence.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3312/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/TaskSequence.java?rev=1371142&r1=1371141&r2=1371142&view=diff
==============================================================================
--- lucene/dev/branches/lucene3312/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/TaskSequence.java (original)
+++ lucene/dev/branches/lucene3312/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/TaskSequence.java Thu Aug 9 10:20:53 2012
@@ -331,7 +331,9 @@ public class TaskSequence extends PerfTa
// Forwards top request to children
if (runningParallelTasks != null) {
for(ParallelTask t : runningParallelTasks) {
- t.task.stopNow();
+ if (t != null) {
+ t.task.stopNow();
+ }
}
}
}
@@ -355,6 +357,12 @@ public class TaskSequence extends PerfTa
// run threads
startThreads(t);
+ if (stopNow) {
+ for (ParallelTask task : t) {
+ task.task.stopNow();
+ }
+ }
+
// wait for all threads to complete
int count = 0;
for (int i = 0; i < t.length; i++) {
Modified: lucene/dev/branches/lucene3312/lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3312/lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java?rev=1371142&r1=1371141&r2=1371142&view=diff
==============================================================================
--- lucene/dev/branches/lucene3312/lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java (original)
+++ lucene/dev/branches/lucene3312/lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java Thu Aug 9 10:20:53 2012
@@ -156,7 +156,12 @@ public class TestPerfTasksLogic extends
CountingSearchTestTask.numSearches = 0;
execBenchmark(algLines);
- assertTrue(CountingSearchTestTask.numSearches > 0);
+
+ // NOTE: cannot assert this, because on a super-slow
+ // system, it could be after waiting 0.5 seconds that
+ // the search threads hadn't yet succeeded in starting
+ // up and then they start up and do no searching:
+ //assertTrue(CountingSearchTestTask.numSearches > 0);
}
public void testHighlighting() throws Exception {
@@ -500,7 +505,7 @@ public class TestPerfTasksLogic extends
TermsEnum termsEnum = terms.iterator(null);
DocsEnum docs = null;
while(termsEnum.next() != null) {
- docs = _TestUtil.docs(random(), termsEnum, MultiFields.getLiveDocs(reader), docs, true);
+ docs = _TestUtil.docs(random(), termsEnum, MultiFields.getLiveDocs(reader), docs, DocsEnum.FLAG_FREQS);
while(docs.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
totalTokenCount2 += docs.freq();
}
Modified: lucene/dev/branches/lucene3312/lucene/build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3312/lucene/build.xml?rev=1371142&r1=1371141&r2=1371142&view=diff
==============================================================================
--- lucene/dev/branches/lucene3312/lucene/build.xml (original)
+++ lucene/dev/branches/lucene3312/lucene/build.xml Thu Aug 9 10:20:53 2012
@@ -35,10 +35,9 @@
MIGRATE.txt,JRE_VERSION_MIGRATION.txt,
CHANGES.txt,
**/lib/*.jar,
- **/lib/*LICENSE*.txt,
- **/lib/*NOTICE*.txt,
+ licenses/**,
*/docs/,**/README*"
- excludes="build/**,site/**"
+ excludes="build/**,site/**,tools/**"
/>
@@ -572,4 +571,8 @@
</sequential>
</target>
+ <target name="jar-checksums" depends="clean-jars,resolve">
+ <jar-checksum-macro srcdir="${common.dir}" dstdir="${common.dir}/licenses"/>
+ </target>
+
</project>
Modified: lucene/dev/branches/lucene3312/lucene/common-build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3312/lucene/common-build.xml?rev=1371142&r1=1371141&r2=1371142&view=diff
==============================================================================
--- lucene/dev/branches/lucene3312/lucene/common-build.xml (original)
+++ lucene/dev/branches/lucene3312/lucene/common-build.xml Thu Aug 9 10:20:53 2012
@@ -1567,6 +1567,43 @@ ${tests-output}/junit4-*.suites - pe
</sequential>
</macrodef>
+ <macrodef name="jar-checksum-macro">
+ <attribute name="srcdir"/>
+ <attribute name="dstdir"/>
+ <sequential>
+ <delete>
+ <fileset dir="@{dstdir}">
+ <include name="**/*.jar.sha1"/>
+ </fileset>
+ </delete>
+
+ <!-- checksum task does not have a flatten=true -->
+ <tempfile property="jar-checksum.temp.dir"/>
+ <mkdir dir="${jar-checksum.temp.dir}"/>
+ <copy todir="${jar-checksum.temp.dir}" flatten="true">
+ <fileset dir="@{srcdir}">
+ <include name="**/*.jar"/>
+ <!-- todo make this something passed into the macro and not some hardcoded set -->
+ <exclude name="build/**"/>
+ <exclude name="dist/**"/>
+ <exclude name="package/**"/>
+ <exclude name="example/exampledocs/**"/>
+ </fileset>
+ </copy>
+
+ <checksum algorithm="SHA1" fileext=".sha1" todir="@{dstdir}">
+ <fileset dir="${jar-checksum.temp.dir}"/>
+ </checksum>
+
+ <delete dir="${jar-checksum.temp.dir}"/>
+
+ <fixcrlf
+ srcdir="@{dstdir}"
+ includes="**/*.jar.sha1"
+ eol="lf" fixlast="true" encoding="US-ASCII" />
+ </sequential>
+ </macrodef>
+
<macrodef name="sign-artifacts-macro">
<attribute name="artifacts.dir"/>
<sequential>
@@ -1701,7 +1738,7 @@ ${tests-output}/junit4-*.suites - pe
<attribute name="dir"/>
<attribute name="level" default="class"/>
<sequential>
- <exec dir="." executable="${python.exe}" failonerror="true">
+ <exec dir="." executable="${python32.exe}" failonerror="true">
<arg value="${dev-tools.dir}/scripts/checkJavaDocs.py"/>
<arg value="@{dir}"/>
<arg value="@{level}"/>
Modified: lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/package.html
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/package.html?rev=1371142&r1=1371141&r2=1371142&view=diff
==============================================================================
--- lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/package.html (original)
+++ lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/package.html Thu Aug 9 10:20:53 2012
@@ -480,7 +480,7 @@ public class MyAnalyzer extends Analyzer
System.out.println(termAtt.toString());
}
- stream.end()
+ stream.end();
} finally {
stream.close();
}
@@ -509,7 +509,7 @@ easily by adding a LengthFilter to the c
{@literal @Override}
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
final Tokenizer source = new WhitespaceTokenizer(matchVersion, reader);
- TokenStream result = new LengthFilter(source, 3, Integer.MAX_VALUE);
+ TokenStream result = new LengthFilter(true, source, 3, Integer.MAX_VALUE);
return new TokenStreamComponents(source, result);
}
</pre>
Modified: lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/codecs/BlockTermsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/codecs/BlockTermsReader.java?rev=1371142&r1=1371141&r2=1371142&view=diff
==============================================================================
--- lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/codecs/BlockTermsReader.java (original)
+++ lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/codecs/BlockTermsReader.java Thu Aug 9 10:20:53 2012
@@ -683,31 +683,22 @@ public class BlockTermsReader extends Fi
}
@Override
- public DocsEnum docs(Bits liveDocs, DocsEnum reuse, boolean needsFreqs) throws IOException {
+ public DocsEnum docs(Bits liveDocs, DocsEnum reuse, int flags) throws IOException {
//System.out.println("BTR.docs this=" + this);
- if (needsFreqs && fieldInfo.getIndexOptions() == IndexOptions.DOCS_ONLY) {
- return null;
- }
decodeMetaData();
//System.out.println("BTR.docs: state.docFreq=" + state.docFreq);
- return postingsReader.docs(fieldInfo, state, liveDocs, reuse, needsFreqs);
+ return postingsReader.docs(fieldInfo, state, liveDocs, reuse, flags);
}
@Override
- public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, boolean needsOffsets) throws IOException {
+ public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, int flags) throws IOException {
if (fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) < 0) {
// Positions were not indexed:
return null;
}
- if (needsOffsets &&
- fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) < 0) {
- // Offsets were not indexed:
- return null;
- }
-
decodeMetaData();
- return postingsReader.docsAndPositions(fieldInfo, state, liveDocs, reuse, needsOffsets);
+ return postingsReader.docsAndPositions(fieldInfo, state, liveDocs, reuse, flags);
}
@Override
Modified: lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsReader.java?rev=1371142&r1=1371141&r2=1371142&view=diff
==============================================================================
--- lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsReader.java (original)
+++ lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsReader.java Thu Aug 9 10:20:53 2012
@@ -893,29 +893,20 @@ public class BlockTreeTermsReader extend
}
@Override
- public DocsEnum docs(Bits skipDocs, DocsEnum reuse, boolean needsFreqs) throws IOException {
+ public DocsEnum docs(Bits skipDocs, DocsEnum reuse, int flags) throws IOException {
currentFrame.decodeMetaData();
- if (needsFreqs && fieldInfo.getIndexOptions() == IndexOptions.DOCS_ONLY) {
- return null;
- }
- return postingsReader.docs(fieldInfo, currentFrame.termState, skipDocs, reuse, needsFreqs);
+ return postingsReader.docs(fieldInfo, currentFrame.termState, skipDocs, reuse, flags);
}
@Override
- public DocsAndPositionsEnum docsAndPositions(Bits skipDocs, DocsAndPositionsEnum reuse, boolean needsOffsets) throws IOException {
+ public DocsAndPositionsEnum docsAndPositions(Bits skipDocs, DocsAndPositionsEnum reuse, int flags) throws IOException {
if (fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) < 0) {
// Positions were not indexed:
return null;
}
- if (needsOffsets &&
- fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) < 0) {
- // Offsets were not indexed:
- return null;
- }
-
currentFrame.decodeMetaData();
- return postingsReader.docsAndPositions(fieldInfo, currentFrame.termState, skipDocs, reuse, needsOffsets);
+ return postingsReader.docsAndPositions(fieldInfo, currentFrame.termState, skipDocs, reuse, flags);
}
private int getState() {
@@ -2121,10 +2112,7 @@ public class BlockTreeTermsReader extend
}
@Override
- public DocsEnum docs(Bits skipDocs, DocsEnum reuse, boolean needsFreqs) throws IOException {
- if (needsFreqs && fieldInfo.getIndexOptions() == IndexOptions.DOCS_ONLY) {
- return null;
- }
+ public DocsEnum docs(Bits skipDocs, DocsEnum reuse, int flags) throws IOException {
assert !eof;
//if (DEBUG) {
//System.out.println("BTTR.docs seg=" + segment);
@@ -2133,25 +2121,19 @@ public class BlockTreeTermsReader extend
//if (DEBUG) {
//System.out.println(" state=" + currentFrame.state);
//}
- return postingsReader.docs(fieldInfo, currentFrame.state, skipDocs, reuse, needsFreqs);
+ return postingsReader.docs(fieldInfo, currentFrame.state, skipDocs, reuse, flags);
}
@Override
- public DocsAndPositionsEnum docsAndPositions(Bits skipDocs, DocsAndPositionsEnum reuse, boolean needsOffsets) throws IOException {
+ public DocsAndPositionsEnum docsAndPositions(Bits skipDocs, DocsAndPositionsEnum reuse, int flags) throws IOException {
if (fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) < 0) {
// Positions were not indexed:
return null;
}
- if (needsOffsets &&
- fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) < 0) {
- // Offsets were not indexed:
- return null;
- }
-
assert !eof;
currentFrame.decodeMetaData();
- return postingsReader.docsAndPositions(fieldInfo, currentFrame.state, skipDocs, reuse, needsOffsets);
+ return postingsReader.docsAndPositions(fieldInfo, currentFrame.state, skipDocs, reuse, flags);
}
@Override
Modified: lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/codecs/PostingsFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/codecs/PostingsFormat.java?rev=1371142&r1=1371141&r2=1371142&view=diff
==============================================================================
--- lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/codecs/PostingsFormat.java (original)
+++ lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/codecs/PostingsFormat.java Thu Aug 9 10:20:53 2012
@@ -53,7 +53,13 @@ public abstract class PostingsFormat imp
/** Reads a segment. NOTE: by the time this call
* returns, it must hold open any files it will need to
- * use; else, those files may be deleted. */
+ * use; else, those files may be deleted.
+ * Additionally, required files may be deleted during the execution of
+ * this call before there is a chance to open them. Under these
+ * circumstances an IOException should be thrown by the implementation.
+ * IOExceptions are expected and will automatically cause a retry of the
+ * segment opening logic with the newly revised segments.
+ * */
public abstract FieldsProducer fieldsProducer(SegmentReadState state) throws IOException;
@Override
Modified: lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/codecs/PostingsReaderBase.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/codecs/PostingsReaderBase.java?rev=1371142&r1=1371141&r2=1371142&view=diff
==============================================================================
--- lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/codecs/PostingsReaderBase.java (original)
+++ lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/codecs/PostingsReaderBase.java Thu Aug 9 10:20:53 2012
@@ -51,12 +51,12 @@ public abstract class PostingsReaderBase
/** Must fully consume state, since after this call that
* TermState may be reused. */
- public abstract DocsEnum docs(FieldInfo fieldInfo, BlockTermState state, Bits skipDocs, DocsEnum reuse, boolean needsFreqs) throws IOException;
+ public abstract DocsEnum docs(FieldInfo fieldInfo, BlockTermState state, Bits skipDocs, DocsEnum reuse, int flags) throws IOException;
/** Must fully consume state, since after this call that
* TermState may be reused. */
public abstract DocsAndPositionsEnum docsAndPositions(FieldInfo fieldInfo, BlockTermState state, Bits skipDocs, DocsAndPositionsEnum reuse,
- boolean needsOffsets) throws IOException;
+ int flags) throws IOException;
public abstract void close() throws IOException;
Modified: lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/codecs/TermVectorsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/codecs/TermVectorsWriter.java?rev=1371142&r1=1371141&r2=1371142&view=diff
==============================================================================
--- lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/codecs/TermVectorsWriter.java (original)
+++ lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/codecs/TermVectorsWriter.java Thu Aug 9 10:20:53 2012
@@ -224,16 +224,9 @@ public abstract class TermVectorsWriter
// TODO: we need a "query" API where we can ask (via
// flex API) what this term was indexed with...
// Both positions & offsets:
- docsAndPositionsEnum = termsEnum.docsAndPositions(null, null, true);
- final boolean hasOffsets;
+ docsAndPositionsEnum = termsEnum.docsAndPositions(null, null);
+ boolean hasOffsets = false;
boolean hasPositions = false;
- if (docsAndPositionsEnum == null) {
- // Fallback: no offsets
- docsAndPositionsEnum = termsEnum.docsAndPositions(null, null, false);
- hasOffsets = false;
- } else {
- hasOffsets = true;
- }
if (docsAndPositionsEnum != null) {
final int docID = docsAndPositionsEnum.nextDoc();
@@ -242,23 +235,19 @@ public abstract class TermVectorsWriter
for(int posUpto=0; posUpto<freq; posUpto++) {
final int pos = docsAndPositionsEnum.nextPosition();
+ final int startOffset = docsAndPositionsEnum.startOffset();
+ final int endOffset = docsAndPositionsEnum.endOffset();
if (!startedField) {
assert numTerms > 0;
hasPositions = pos != -1;
+ hasOffsets = startOffset != -1;
startField(fieldInfo, numTerms, hasPositions, hasOffsets);
startTerm(termsEnum.term(), freq);
startedField = true;
}
- final int startOffset;
- final int endOffset;
if (hasOffsets) {
- startOffset = docsAndPositionsEnum.startOffset();
- endOffset = docsAndPositionsEnum.endOffset();
assert startOffset != -1;
assert endOffset != -1;
- } else {
- startOffset = -1;
- endOffset = -1;
}
assert !hasPositions || pos >= 0;
addPosition(pos, startOffset, endOffset);
Modified: lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/codecs/TermsConsumer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/codecs/TermsConsumer.java?rev=1371142&r1=1371141&r2=1371142&view=diff
==============================================================================
--- lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/codecs/TermsConsumer.java (original)
+++ lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/codecs/TermsConsumer.java Thu Aug 9 10:20:53 2012
@@ -21,6 +21,7 @@ import java.io.IOException;
import java.util.Comparator;
import org.apache.lucene.index.FieldInfo; // javadocs
+import org.apache.lucene.index.DocsAndPositionsEnum;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.MergeState;
import org.apache.lucene.index.TermsEnum;
@@ -97,7 +98,7 @@ public abstract class TermsConsumer {
while((term = termsEnum.next()) != null) {
// We can pass null for liveDocs, because the
// mapping enum will skip the non-live docs:
- docsEnumIn = (MultiDocsEnum) termsEnum.docs(null, docsEnumIn, false);
+ docsEnumIn = (MultiDocsEnum) termsEnum.docs(null, docsEnumIn, 0);
if (docsEnumIn != null) {
docsEnum.reset(docsEnumIn);
final PostingsConsumer postingsConsumer = startTerm(term);
@@ -125,7 +126,7 @@ public abstract class TermsConsumer {
while((term = termsEnum.next()) != null) {
// We can pass null for liveDocs, because the
// mapping enum will skip the non-live docs:
- docsAndFreqsEnumIn = (MultiDocsEnum) termsEnum.docs(null, docsAndFreqsEnumIn, true);
+ docsAndFreqsEnumIn = (MultiDocsEnum) termsEnum.docs(null, docsAndFreqsEnumIn);
assert docsAndFreqsEnumIn != null;
docsAndFreqsEnum.reset(docsAndFreqsEnumIn);
final PostingsConsumer postingsConsumer = startTerm(term);
@@ -150,7 +151,7 @@ public abstract class TermsConsumer {
while((term = termsEnum.next()) != null) {
// We can pass null for liveDocs, because the
// mapping enum will skip the non-live docs:
- postingsEnumIn = (MultiDocsAndPositionsEnum) termsEnum.docsAndPositions(null, postingsEnumIn, false);
+ postingsEnumIn = (MultiDocsAndPositionsEnum) termsEnum.docsAndPositions(null, postingsEnumIn, DocsAndPositionsEnum.FLAG_PAYLOADS);
assert postingsEnumIn != null;
postingsEnum.reset(postingsEnumIn);
// set PayloadProcessor
@@ -184,7 +185,7 @@ public abstract class TermsConsumer {
while((term = termsEnum.next()) != null) {
// We can pass null for liveDocs, because the
// mapping enum will skip the non-live docs:
- postingsEnumIn = (MultiDocsAndPositionsEnum) termsEnum.docsAndPositions(null, postingsEnumIn, true);
+ postingsEnumIn = (MultiDocsAndPositionsEnum) termsEnum.docsAndPositions(null, postingsEnumIn);
assert postingsEnumIn != null;
postingsEnum.reset(postingsEnumIn);
// set PayloadProcessor
Modified: lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsReader.java?rev=1371142&r1=1371141&r2=1371142&view=diff
==============================================================================
--- lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsReader.java (original)
+++ lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsReader.java Thu Aug 9 10:20:53 2012
@@ -18,6 +18,7 @@ package org.apache.lucene.codecs.lucene4
*/
import java.io.IOException;
+import java.util.Arrays;
import org.apache.lucene.codecs.BlockTermState;
import org.apache.lucene.codecs.CodecUtil;
@@ -218,7 +219,7 @@ public class Lucene40PostingsReader exte
}
@Override
- public DocsEnum docs(FieldInfo fieldInfo, BlockTermState termState, Bits liveDocs, DocsEnum reuse, boolean needsFreqs) throws IOException {
+ public DocsEnum docs(FieldInfo fieldInfo, BlockTermState termState, Bits liveDocs, DocsEnum reuse, int flags) throws IOException {
if (canReuse(reuse, liveDocs)) {
// if (DEBUG) System.out.println("SPR.docs ts=" + termState);
return ((SegmentDocsEnumBase) reuse).reset(fieldInfo, (StandardTermState)termState);
@@ -250,11 +251,14 @@ public class Lucene40PostingsReader exte
@Override
public DocsAndPositionsEnum docsAndPositions(FieldInfo fieldInfo, BlockTermState termState, Bits liveDocs,
- DocsAndPositionsEnum reuse, boolean needsOffsets)
+ DocsAndPositionsEnum reuse, int flags)
throws IOException {
boolean hasOffsets = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
+ // TODO: can we optimize if FLAG_PAYLOADS / FLAG_OFFSETS
+ // isn't passed?
+
// TODO: refactor
if (fieldInfo.hasPayloads() || hasOffsets) {
SegmentFullPositionsEnum docsEnum;
@@ -348,13 +352,16 @@ public class Lucene40PostingsReader exte
start = -1;
count = 0;
+ freq = 1;
+ if (indexOmitsTF) {
+ Arrays.fill(freqs, 1);
+ }
maxBufferedDocId = -1;
return this;
}
@Override
public final int freq() {
- assert !indexOmitsTF;
return freq;
}
Modified: lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsReader.java?rev=1371142&r1=1371141&r2=1371142&view=diff
==============================================================================
--- lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsReader.java (original)
+++ lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsReader.java Thu Aug 9 10:20:53 2012
@@ -478,7 +478,7 @@ public class Lucene40TermVectorsReader e
}
@Override
- public DocsEnum docs(Bits liveDocs, DocsEnum reuse, boolean needsFreqs /* ignored */) throws IOException {
+ public DocsEnum docs(Bits liveDocs, DocsEnum reuse, int flags /* ignored */) throws IOException {
TVDocsEnum docsEnum;
if (reuse != null && reuse instanceof TVDocsEnum) {
docsEnum = (TVDocsEnum) reuse;
@@ -490,10 +490,7 @@ public class Lucene40TermVectorsReader e
}
@Override
- public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, boolean needsOffsets) throws IOException {
- if (needsOffsets && !storeOffsets) {
- return null;
- }
+ public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, int flags) throws IOException {
if (!storePositions && !storeOffsets) {
return null;
@@ -640,14 +637,20 @@ public class Lucene40TermVectorsReader e
@Override
public int startOffset() {
- assert startOffsets != null;
- return startOffsets[nextPos-1];
+ if (startOffsets == null) {
+ return -1;
+ } else {
+ return startOffsets[nextPos-1];
+ }
}
@Override
public int endOffset() {
- assert endOffsets != null;
- return endOffsets[nextPos-1];
+ if (endOffsets == null) {
+ return -1;
+ } else {
+ return endOffsets[nextPos-1];
+ }
}
}
Modified: lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/codecs/memory/DirectPostingsFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/codecs/memory/DirectPostingsFormat.java?rev=1371142&r1=1371141&r2=1371142&view=diff
==============================================================================
--- lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/codecs/memory/DirectPostingsFormat.java (original)
+++ lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/codecs/memory/DirectPostingsFormat.java Thu Aug 9 10:20:53 2012
@@ -314,9 +314,9 @@ public class DirectPostingsFormat extend
termOffsets[count+1] = termOffset;
if (hasPos) {
- docsAndPositionsEnum = termsEnum.docsAndPositions(null, docsAndPositionsEnum, hasOffsets);
+ docsAndPositionsEnum = termsEnum.docsAndPositions(null, docsAndPositionsEnum);
} else {
- docsEnum = termsEnum.docs(null, docsEnum, hasFreq);
+ docsEnum = termsEnum.docs(null, docsEnum);
}
final TermAndSkip ent;
@@ -781,11 +781,7 @@ public class DirectPostingsFormat extend
}
@Override
- public DocsEnum docs(Bits liveDocs, DocsEnum reuse, boolean needsFreqs) {
- if (needsFreqs && !hasFreq) {
- return null;
- }
-
+ public DocsEnum docs(Bits liveDocs, DocsEnum reuse, int flags) {
// TODO: implement reuse, something like Pulsing:
// it's hairy!
@@ -858,13 +854,10 @@ public class DirectPostingsFormat extend
}
@Override
- public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, boolean needsOffsets) {
+ public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, int flags) {
if (!hasPos) {
return null;
}
- if (needsOffsets && !hasOffsets) {
- return null;
- }
// TODO: implement reuse, something like Pulsing:
// it's hairy!
@@ -1384,11 +1377,7 @@ public class DirectPostingsFormat extend
}
@Override
- public DocsEnum docs(Bits liveDocs, DocsEnum reuse, boolean needsFreqs) {
- if (needsFreqs && !hasFreq) {
- return null;
- }
-
+ public DocsEnum docs(Bits liveDocs, DocsEnum reuse, int flags) {
// TODO: implement reuse, something like Pulsing:
// it's hairy!
@@ -1420,13 +1409,10 @@ public class DirectPostingsFormat extend
}
@Override
- public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, boolean needsOffsets) {
+ public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, int flags) {
if (!hasPos) {
return null;
}
- if (needsOffsets && !hasOffsets) {
- return null;
- }
// TODO: implement reuse, something like Pulsing:
// it's hairy!
@@ -1507,7 +1493,6 @@ public class DirectPostingsFormat extend
@Override
public int freq() {
- assert false;
return 1;
}
@@ -1882,7 +1867,11 @@ public class DirectPostingsFormat extend
@Override
public int freq() {
- return freqs[upto];
+ if (freqs == null) {
+ return 1;
+ } else {
+ return freqs[upto];
+ }
}
@Override
Modified: lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/codecs/memory/MemoryPostingsFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/codecs/memory/MemoryPostingsFormat.java?rev=1371142&r1=1371141&r2=1371142&view=diff
==============================================================================
--- lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/codecs/memory/MemoryPostingsFormat.java (original)
+++ lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/codecs/memory/MemoryPostingsFormat.java Thu Aug 9 10:20:53 2012
@@ -344,6 +344,7 @@ public class MemoryPostingsFormat extend
docID = -1;
accum = 0;
docUpto = 0;
+ freq = 1;
payloadLen = 0;
this.numDocs = numDocs;
return this;
@@ -428,7 +429,6 @@ public class MemoryPostingsFormat extend
@Override
public int freq() {
- assert indexOptions != IndexOptions.DOCS_ONLY;
return freq;
}
}
@@ -696,13 +696,11 @@ public class MemoryPostingsFormat extend
}
@Override
- public DocsEnum docs(Bits liveDocs, DocsEnum reuse, boolean needsFreqs) {
+ public DocsEnum docs(Bits liveDocs, DocsEnum reuse, int flags) {
decodeMetaData();
FSTDocsEnum docsEnum;
- if (needsFreqs && field.getIndexOptions() == IndexOptions.DOCS_ONLY) {
- return null;
- } else if (reuse == null || !(reuse instanceof FSTDocsEnum)) {
+ if (reuse == null || !(reuse instanceof FSTDocsEnum)) {
docsEnum = new FSTDocsEnum(field.getIndexOptions(), field.hasPayloads());
} else {
docsEnum = (FSTDocsEnum) reuse;
@@ -714,13 +712,9 @@ public class MemoryPostingsFormat extend
}
@Override
- public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, boolean needsOffsets) {
+ public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, int flags) {
boolean hasOffsets = field.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
- if (needsOffsets && !hasOffsets) {
- return null; // not available
- }
-
if (field.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) < 0) {
return null;
}
Modified: lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/codecs/pulsing/PulsingPostingsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/codecs/pulsing/PulsingPostingsReader.java?rev=1371142&r1=1371141&r2=1371142&view=diff
==============================================================================
--- lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/codecs/pulsing/PulsingPostingsReader.java (original)
+++ lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/codecs/pulsing/PulsingPostingsReader.java Thu Aug 9 10:20:53 2012
@@ -178,7 +178,7 @@ public class PulsingPostingsReader exten
}
@Override
- public DocsEnum docs(FieldInfo field, BlockTermState _termState, Bits liveDocs, DocsEnum reuse, boolean needsFreqs) throws IOException {
+ public DocsEnum docs(FieldInfo field, BlockTermState _termState, Bits liveDocs, DocsEnum reuse, int flags) throws IOException {
PulsingTermState termState = (PulsingTermState) _termState;
if (termState.postingsSize != -1) {
PulsingDocsEnum postings;
@@ -202,18 +202,18 @@ public class PulsingPostingsReader exten
return postings.reset(liveDocs, termState);
} else {
if (reuse instanceof PulsingDocsEnum) {
- DocsEnum wrapped = wrappedPostingsReader.docs(field, termState.wrappedTermState, liveDocs, getOther(reuse), needsFreqs);
+ DocsEnum wrapped = wrappedPostingsReader.docs(field, termState.wrappedTermState, liveDocs, getOther(reuse), flags);
setOther(wrapped, reuse); // wrapped.other = reuse
return wrapped;
} else {
- return wrappedPostingsReader.docs(field, termState.wrappedTermState, liveDocs, reuse, needsFreqs);
+ return wrappedPostingsReader.docs(field, termState.wrappedTermState, liveDocs, reuse, flags);
}
}
}
@Override
public DocsAndPositionsEnum docsAndPositions(FieldInfo field, BlockTermState _termState, Bits liveDocs, DocsAndPositionsEnum reuse,
- boolean needsOffsets) throws IOException {
+ int flags) throws IOException {
final PulsingTermState termState = (PulsingTermState) _termState;
@@ -240,11 +240,11 @@ public class PulsingPostingsReader exten
} else {
if (reuse instanceof PulsingDocsAndPositionsEnum) {
DocsAndPositionsEnum wrapped = wrappedPostingsReader.docsAndPositions(field, termState.wrappedTermState, liveDocs, (DocsAndPositionsEnum) getOther(reuse),
- needsOffsets);
+ flags);
setOther(wrapped, reuse); // wrapped.other = reuse
return wrapped;
} else {
- return wrappedPostingsReader.docsAndPositions(field, termState.wrappedTermState, liveDocs, reuse, needsOffsets);
+ return wrappedPostingsReader.docsAndPositions(field, termState.wrappedTermState, liveDocs, reuse, flags);
}
}
}
@@ -282,6 +282,7 @@ public class PulsingPostingsReader exten
postings.reset(postingsBytes, 0, termState.postingsSize);
docID = -1;
accum = 0;
+ freq = 1;
payloadLength = 0;
this.liveDocs = liveDocs;
return this;
@@ -349,7 +350,6 @@ public class PulsingPostingsReader exten
@Override
public int freq() throws IOException {
- assert indexOptions != IndexOptions.DOCS_ONLY;
return freq;
}
Modified: lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/codecs/sep/SepPostingsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/codecs/sep/SepPostingsReader.java?rev=1371142&r1=1371141&r2=1371142&view=diff
==============================================================================
--- lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/codecs/sep/SepPostingsReader.java (original)
+++ lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/codecs/sep/SepPostingsReader.java Thu Aug 9 10:20:53 2012
@@ -258,7 +258,7 @@ public class SepPostingsReader extends P
}
@Override
- public DocsEnum docs(FieldInfo fieldInfo, BlockTermState _termState, Bits liveDocs, DocsEnum reuse, boolean needsFreqs) throws IOException {
+ public DocsEnum docs(FieldInfo fieldInfo, BlockTermState _termState, Bits liveDocs, DocsEnum reuse, int flags) throws IOException {
final SepTermState termState = (SepTermState) _termState;
SepDocsEnum docsEnum;
if (reuse == null || !(reuse instanceof SepDocsEnum)) {
@@ -278,13 +278,9 @@ public class SepPostingsReader extends P
@Override
public DocsAndPositionsEnum docsAndPositions(FieldInfo fieldInfo, BlockTermState _termState, Bits liveDocs,
- DocsAndPositionsEnum reuse, boolean needsOffsets)
+ DocsAndPositionsEnum reuse, int flags)
throws IOException {
- if (needsOffsets) {
- return null;
- }
-
assert fieldInfo.getIndexOptions() == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
final SepTermState termState = (SepTermState) _termState;
SepDocsAndPositionsEnum postingsEnum;
@@ -370,6 +366,7 @@ public class SepPostingsReader extends P
count = 0;
doc = -1;
accum = 0;
+ freq = 1;
skipped = false;
return this;
@@ -403,7 +400,6 @@ public class SepPostingsReader extends P
@Override
public int freq() throws IOException {
- assert !omitTF;
return freq;
}
Modified: lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsReader.java?rev=1371142&r1=1371141&r2=1371142&view=diff
==============================================================================
--- lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsReader.java (original)
+++ lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsReader.java Thu Aug 9 10:20:53 2012
@@ -194,33 +194,24 @@ class SimpleTextFieldsReader extends Fie
}
@Override
- public DocsEnum docs(Bits liveDocs, DocsEnum reuse, boolean needsFreqs) throws IOException {
- if (needsFreqs && indexOptions == IndexOptions.DOCS_ONLY) {
- return null;
- }
+ public DocsEnum docs(Bits liveDocs, DocsEnum reuse, int flags) throws IOException {
SimpleTextDocsEnum docsEnum;
if (reuse != null && reuse instanceof SimpleTextDocsEnum && ((SimpleTextDocsEnum) reuse).canReuse(SimpleTextFieldsReader.this.in)) {
docsEnum = (SimpleTextDocsEnum) reuse;
} else {
docsEnum = new SimpleTextDocsEnum();
}
- return docsEnum.reset(docsStart, liveDocs, !needsFreqs);
+ return docsEnum.reset(docsStart, liveDocs, indexOptions == IndexOptions.DOCS_ONLY);
}
@Override
- public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, boolean needsOffsets) throws IOException {
+ public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, int flags) throws IOException {
if (indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) < 0) {
// Positions were not indexed
return null;
}
- if (needsOffsets &&
- indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) < 0) {
- // Offsets were not indexed
- return null;
- }
-
SimpleTextDocsAndPositionsEnum docsAndPositionsEnum;
if (reuse != null && reuse instanceof SimpleTextDocsAndPositionsEnum && ((SimpleTextDocsAndPositionsEnum) reuse).canReuse(SimpleTextFieldsReader.this.in)) {
docsAndPositionsEnum = (SimpleTextDocsAndPositionsEnum) reuse;
@@ -260,6 +251,7 @@ class SimpleTextFieldsReader extends Fie
in.seek(fp);
this.omitTF = omitTF;
docID = -1;
+ tf = 1;
return this;
}
@@ -270,7 +262,6 @@ class SimpleTextFieldsReader extends Fie
@Override
public int freq() throws IOException {
- assert !omitTF;
return tf;
}
Modified: lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsReader.java?rev=1371142&r1=1371141&r2=1371142&view=diff
==============================================================================
--- lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsReader.java (original)
+++ lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsReader.java Thu Aug 9 10:20:53 2012
@@ -357,22 +357,19 @@ public class SimpleTextTermVectorsReader
}
@Override
- public DocsEnum docs(Bits liveDocs, DocsEnum reuse, boolean needsFreqs) throws IOException {
+ public DocsEnum docs(Bits liveDocs, DocsEnum reuse, int flags) throws IOException {
// TODO: reuse
SimpleTVDocsEnum e = new SimpleTVDocsEnum();
- e.reset(liveDocs, needsFreqs ? current.getValue().freq : -1);
+ e.reset(liveDocs, (flags & DocsEnum.FLAG_FREQS) == 0 ? 1 : current.getValue().freq);
return e;
}
@Override
- public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, boolean needsOffsets) throws IOException {
+ public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, int flags) throws IOException {
SimpleTVPostings postings = current.getValue();
if (postings.positions == null && postings.startOffsets == null) {
return null;
}
- if (needsOffsets && (postings.startOffsets == null || postings.endOffsets == null)) {
- return null;
- }
// TODO: reuse
SimpleTVDocsAndPositionsEnum e = new SimpleTVDocsAndPositionsEnum();
e.reset(liveDocs, postings.positions, postings.startOffsets, postings.endOffsets);
@@ -507,12 +504,20 @@ public class SimpleTextTermVectorsReader
@Override
public int startOffset() {
- return startOffsets[nextPos-1];
+ if (startOffsets == null) {
+ return -1;
+ } else {
+ return startOffsets[nextPos-1];
+ }
}
@Override
public int endOffset() {
- return endOffsets[nextPos-1];
+ if (endOffsets == null) {
+ return -1;
+ } else {
+ return endOffsets[nextPos-1];
+ }
}
}
}
Modified: lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/document/Field.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/document/Field.java?rev=1371142&r1=1371141&r2=1371142&view=diff
==============================================================================
--- lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/document/Field.java (original)
+++ lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/document/Field.java Thu Aug 9 10:20:53 2012
@@ -378,6 +378,11 @@ public class Field implements IndexableF
* @see org.apache.lucene.search.similarities.DefaultSimilarity#encodeNormValue(float)
*/
public void setBoost(float boost) {
+ if (boost != 1.0f) {
+ if (type.indexed() == false || type.omitNorms()) {
+ throw new IllegalArgumentException("You cannot set an index-time boost on an unindexed field, or one that omits norms");
+ }
+ }
this.boost = boost;
}
Modified: lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/document/StoredField.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/document/StoredField.java?rev=1371142&r1=1371141&r2=1371142&view=diff
==============================================================================
--- lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/document/StoredField.java (original)
+++ lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/document/StoredField.java Thu Aug 9 10:20:53 2012
@@ -61,6 +61,7 @@ public class StoredField extends Field {
super(name, value, type);
}
+ // TODO: not great but maybe not a big problem?
public StoredField(String name, int value) {
super(name, TYPE);
fieldsData = value;
Modified: lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/document/StringField.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/document/StringField.java?rev=1371142&r1=1371141&r2=1371142&view=diff
==============================================================================
--- lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/document/StringField.java (original)
+++ lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/document/StringField.java Thu Aug 9 10:20:53 2012
@@ -54,9 +54,4 @@ public final class StringField extends F
public StringField(String name, String value, Store stored) {
super(name, value, stored == Store.YES ? TYPE_STORED : TYPE_NOT_STORED);
}
-
- @Override
- public String stringValue() {
- return (fieldsData == null) ? null : fieldsData.toString();
- }
}
Modified: lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/document/TextField.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/document/TextField.java?rev=1371142&r1=1371141&r2=1371142&view=diff
==============================================================================
--- lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/document/TextField.java (original)
+++ lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/document/TextField.java Thu Aug 9 10:20:53 2012
@@ -46,9 +46,9 @@ public final class TextField extends Fie
// TODO: add sugar for term vectors...?
- /** Creates a new TextField with Reader value. */
- public TextField(String name, Reader reader, Store store) {
- super(name, reader, store == Store.YES ? TYPE_STORED : TYPE_NOT_STORED);
+ /** Creates a new un-stored TextField with Reader value. */
+ public TextField(String name, Reader reader) {
+ super(name, reader, TYPE_NOT_STORED);
}
/** Creates a new TextField with String value. */