You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ja...@apache.org on 2013/05/30 09:53:46 UTC
svn commit: r1487777 [32/50] - in /lucene/dev/branches/security: ./
dev-tools/ dev-tools/eclipse/dot.settings/ dev-tools/idea/.idea/
dev-tools/idea/.idea/libraries/ dev-tools/idea/lucene/replicator/
dev-tools/maven/ dev-tools/maven/lucene/ dev-tools/ma...
Modified: lucene/dev/branches/security/lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/WFSTCompletionLookup.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/WFSTCompletionLookup.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/WFSTCompletionLookup.java (original)
+++ lucene/dev/branches/security/lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/WFSTCompletionLookup.java Thu May 30 07:53:18 2013
@@ -26,9 +26,10 @@ import java.util.Comparator;
import java.util.List;
import org.apache.lucene.search.spell.TermFreqIterator;
+import org.apache.lucene.search.spell.TermFreqPayloadIterator;
import org.apache.lucene.search.suggest.Lookup;
-import org.apache.lucene.search.suggest.SortedTermFreqIteratorWrapper;
import org.apache.lucene.search.suggest.Sort.ByteSequencesWriter;
+import org.apache.lucene.search.suggest.SortedTermFreqIteratorWrapper;
import org.apache.lucene.store.ByteArrayDataInput;
import org.apache.lucene.store.ByteArrayDataOutput;
import org.apache.lucene.store.InputStreamDataInput;
@@ -40,12 +41,12 @@ import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.IntsRef;
import org.apache.lucene.util.UnicodeUtil;
import org.apache.lucene.util.fst.Builder;
-import org.apache.lucene.util.fst.FST;
import org.apache.lucene.util.fst.FST.Arc;
import org.apache.lucene.util.fst.FST.BytesReader;
+import org.apache.lucene.util.fst.FST;
import org.apache.lucene.util.fst.PositiveIntOutputs;
-import org.apache.lucene.util.fst.Util;
import org.apache.lucene.util.fst.Util.MinResult;
+import org.apache.lucene.util.fst.Util;
/**
* Suggester based on a weighted FST: it first traverses the prefix,
@@ -93,6 +94,9 @@ public class WFSTCompletionLookup extend
@Override
public void build(TermFreqIterator iterator) throws IOException {
+ if (iterator instanceof TermFreqPayloadIterator) {
+ throw new IllegalArgumentException("this suggester doesn't support payloads");
+ }
BytesRef scratch = new BytesRef();
TermFreqIterator iter = new WFSTTermFreqIteratorWrapper(iterator);
IntsRef scratchInts = new IntsRef();
Modified: lucene/dev/branches/security/lucene/suggest/src/java/org/apache/lucene/search/suggest/jaspell/JaspellLookup.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/suggest/src/java/org/apache/lucene/search/suggest/jaspell/JaspellLookup.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/suggest/src/java/org/apache/lucene/search/suggest/jaspell/JaspellLookup.java (original)
+++ lucene/dev/branches/security/lucene/suggest/src/java/org/apache/lucene/search/suggest/jaspell/JaspellLookup.java Thu May 30 07:53:18 2013
@@ -26,6 +26,7 @@ import java.util.ArrayList;
import java.util.List;
import org.apache.lucene.search.spell.TermFreqIterator;
+import org.apache.lucene.search.spell.TermFreqPayloadIterator;
import org.apache.lucene.search.suggest.Lookup;
import org.apache.lucene.search.suggest.UnsortedTermFreqIteratorWrapper;
import org.apache.lucene.search.suggest.jaspell.JaspellTernarySearchTrie.TSTNode;
@@ -53,6 +54,9 @@ public class JaspellLookup extends Looku
@Override
public void build(TermFreqIterator tfit) throws IOException {
+ if (tfit instanceof TermFreqPayloadIterator) {
+ throw new IllegalArgumentException("this suggester doesn't support payloads");
+ }
if (tfit.getComparator() != null) {
// make sure it's unsorted
// WTF - this could result in yet another sorted iteration....
Modified: lucene/dev/branches/security/lucene/suggest/src/java/org/apache/lucene/search/suggest/tst/TSTLookup.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/suggest/src/java/org/apache/lucene/search/suggest/tst/TSTLookup.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/suggest/src/java/org/apache/lucene/search/suggest/tst/TSTLookup.java (original)
+++ lucene/dev/branches/security/lucene/suggest/src/java/org/apache/lucene/search/suggest/tst/TSTLookup.java Thu May 30 07:53:18 2013
@@ -25,9 +25,10 @@ import java.io.OutputStream;
import java.util.ArrayList;
import java.util.List;
+import org.apache.lucene.search.spell.TermFreqIterator;
+import org.apache.lucene.search.spell.TermFreqPayloadIterator;
import org.apache.lucene.search.suggest.Lookup;
import org.apache.lucene.search.suggest.SortedTermFreqIteratorWrapper;
-import org.apache.lucene.search.spell.TermFreqIterator;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CharsRef;
import org.apache.lucene.util.IOUtils;
@@ -51,6 +52,9 @@ public class TSTLookup extends Lookup {
@Override
public void build(TermFreqIterator tfit) throws IOException {
+ if (tfit instanceof TermFreqPayloadIterator) {
+ throw new IllegalArgumentException("this suggester doesn't support payloads");
+ }
root = new TernaryTreeNode();
// buffer first
if (tfit.getComparator() != BytesRef.getUTF8SortedAsUTF16Comparator()) {
Modified: lucene/dev/branches/security/lucene/suggest/src/test/org/apache/lucene/search/suggest/TestBytesRefArray.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/suggest/src/test/org/apache/lucene/search/suggest/TestBytesRefArray.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/suggest/src/test/org/apache/lucene/search/suggest/TestBytesRefArray.java (original)
+++ lucene/dev/branches/security/lucene/suggest/src/test/org/apache/lucene/search/suggest/TestBytesRefArray.java Thu May 30 07:53:18 2013
@@ -39,11 +39,12 @@ public class TestBytesRefArray extends L
}
int entries = atLeast(500);
BytesRef spare = new BytesRef();
+ int initSize = list.size();
for (int i = 0; i < entries; i++) {
String randomRealisticUnicodeString = _TestUtil
.randomRealisticUnicodeString(random);
spare.copyChars(randomRealisticUnicodeString);
- list.append(spare);
+ assertEquals(i+initSize, list.append(spare));
stringList.add(randomRealisticUnicodeString);
}
for (int i = 0; i < entries; i++) {
@@ -81,11 +82,12 @@ public class TestBytesRefArray extends L
}
int entries = atLeast(500);
BytesRef spare = new BytesRef();
+ final int initSize = list.size();
for (int i = 0; i < entries; i++) {
String randomRealisticUnicodeString = _TestUtil
.randomRealisticUnicodeString(random);
spare.copyChars(randomRealisticUnicodeString);
- list.append(spare);
+ assertEquals(initSize + i, list.append(spare));
stringList.add(randomRealisticUnicodeString);
}
Modified: lucene/dev/branches/security/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggesterTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggesterTest.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggesterTest.java (original)
+++ lucene/dev/branches/security/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggesterTest.java Thu May 30 07:53:18 2013
@@ -53,6 +53,8 @@ import org.apache.lucene.analysis.tokena
import org.apache.lucene.search.suggest.Lookup.LookupResult;
import org.apache.lucene.search.suggest.TermFreq;
import org.apache.lucene.search.suggest.TermFreqArrayIterator;
+import org.apache.lucene.search.suggest.TermFreqPayload;
+import org.apache.lucene.search.suggest.TermFreqPayloadArrayIterator;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util._TestUtil;
@@ -103,6 +105,56 @@ public class AnalyzingSuggesterTest exte
assertEquals(6, results.get(2).value, 0.01F);
}
+ public void testKeywordWithPayloads() throws Exception {
+ TermFreqPayload keys[] = new TermFreqPayload[] {
+ new TermFreqPayload("foo", 50, new BytesRef("hello")),
+ new TermFreqPayload("bar", 10, new BytesRef("goodbye")),
+ new TermFreqPayload("barbar", 12, new BytesRef("thank you")),
+ new TermFreqPayload("barbara", 6, new BytesRef("for all the fish"))
+ };
+
+ AnalyzingSuggester suggester = new AnalyzingSuggester(new MockAnalyzer(random(), MockTokenizer.KEYWORD, false));
+ suggester.build(new TermFreqPayloadArrayIterator(keys));
+
+ // top N of 2, but only foo is available
+ List<LookupResult> results = suggester.lookup(_TestUtil.stringToCharSequence("f", random()), false, 2);
+ assertEquals(1, results.size());
+ assertEquals("foo", results.get(0).key.toString());
+ assertEquals(50, results.get(0).value, 0.01F);
+ assertEquals(new BytesRef("hello"), results.get(0).payload);
+
+ // top N of 1 for 'bar': we return this even though
+ // barbar is higher because exactFirst is enabled:
+ results = suggester.lookup(_TestUtil.stringToCharSequence("bar", random()), false, 1);
+ assertEquals(1, results.size());
+ assertEquals("bar", results.get(0).key.toString());
+ assertEquals(10, results.get(0).value, 0.01F);
+ assertEquals(new BytesRef("goodbye"), results.get(0).payload);
+
+ // top N Of 2 for 'b'
+ results = suggester.lookup(_TestUtil.stringToCharSequence("b", random()), false, 2);
+ assertEquals(2, results.size());
+ assertEquals("barbar", results.get(0).key.toString());
+ assertEquals(12, results.get(0).value, 0.01F);
+ assertEquals(new BytesRef("thank you"), results.get(0).payload);
+ assertEquals("bar", results.get(1).key.toString());
+ assertEquals(10, results.get(1).value, 0.01F);
+ assertEquals(new BytesRef("goodbye"), results.get(1).payload);
+
+ // top N of 3 for 'ba'
+ results = suggester.lookup(_TestUtil.stringToCharSequence("ba", random()), false, 3);
+ assertEquals(3, results.size());
+ assertEquals("barbar", results.get(0).key.toString());
+ assertEquals(12, results.get(0).value, 0.01F);
+ assertEquals(new BytesRef("thank you"), results.get(0).payload);
+ assertEquals("bar", results.get(1).key.toString());
+ assertEquals(10, results.get(1).value, 0.01F);
+ assertEquals(new BytesRef("goodbye"), results.get(1).payload);
+ assertEquals("barbara", results.get(2).key.toString());
+ assertEquals(6, results.get(2).value, 0.01F);
+ assertEquals(new BytesRef("for all the fish"), results.get(2).payload);
+ }
+
// TODO: more tests
/**
* basic "standardanalyzer" test with stopword removal
@@ -112,8 +164,9 @@ public class AnalyzingSuggesterTest exte
new TermFreq("the ghost of christmas past", 50),
};
- Analyzer standard = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, true, MockTokenFilter.ENGLISH_STOPSET, false);
+ Analyzer standard = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, true, MockTokenFilter.ENGLISH_STOPSET);
AnalyzingSuggester suggester = new AnalyzingSuggester(standard);
+ suggester.setPreservePositionIncrements(false);
suggester.build(new TermFreqArrayIterator(keys));
List<LookupResult> results = suggester.lookup(_TestUtil.stringToCharSequence("the ghost of chris", random()), false, 1);
@@ -135,7 +188,7 @@ public class AnalyzingSuggesterTest exte
}
public void testEmpty() throws Exception {
- Analyzer standard = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, true, MockTokenFilter.ENGLISH_STOPSET, false);
+ Analyzer standard = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, true, MockTokenFilter.ENGLISH_STOPSET);
AnalyzingSuggester suggester = new AnalyzingSuggester(standard);
suggester.build(new TermFreqArrayIterator(new TermFreq[0]));
@@ -435,11 +488,13 @@ public class AnalyzingSuggesterTest exte
public final String surfaceForm;
public final String analyzedForm;
public final long weight;
+ public final BytesRef payload;
- public TermFreq2(String surfaceForm, String analyzedForm, long weight) {
+ public TermFreq2(String surfaceForm, String analyzedForm, long weight, BytesRef payload) {
this.surfaceForm = surfaceForm;
this.analyzedForm = analyzedForm;
this.weight = weight;
+ this.payload = payload;
}
@Override
@@ -549,7 +604,15 @@ public class AnalyzingSuggesterTest exte
final TreeSet<String> allPrefixes = new TreeSet<String>();
final Set<String> seen = new HashSet<String>();
- TermFreq[] keys = new TermFreq[numQueries];
+ boolean doPayloads = random().nextBoolean();
+
+ TermFreq[] keys = null;
+ TermFreqPayload[] payloadKeys = null;
+ if (doPayloads) {
+ payloadKeys = new TermFreqPayload[numQueries];
+ } else {
+ keys = new TermFreq[numQueries];
+ }
boolean preserveSep = random().nextBoolean();
@@ -567,6 +630,7 @@ public class AnalyzingSuggesterTest exte
while(true) {
key = "";
analyzedKey = "";
+ boolean lastRemoved = false;
for(int token=0;token < numTokens;token++) {
String s;
while (true) {
@@ -582,10 +646,12 @@ public class AnalyzingSuggesterTest exte
}
key += s;
if (s.length() == 1 && isStopChar(s.charAt(0), numStopChars)) {
+ lastRemoved = true;
if (preserveSep && preserveHoles) {
analyzedKey += SEP;
}
} else {
+ lastRemoved = false;
analyzedKey += s;
}
break;
@@ -595,6 +661,10 @@ public class AnalyzingSuggesterTest exte
analyzedKey = analyzedKey.replaceAll("(^|" + SEP + ")" + SEP + "$", "");
+ if (preserveSep && lastRemoved) {
+ analyzedKey += SEP;
+ }
+
// Don't add same surface form more than once:
if (!seen.contains(key)) {
seen.add(key);
@@ -607,9 +677,18 @@ public class AnalyzingSuggesterTest exte
}
// we can probably do Integer.MAX_VALUE here, but why worry.
int weight = random().nextInt(1<<24);
- keys[i] = new TermFreq(key, weight);
+ BytesRef payload;
+ if (doPayloads) {
+ byte[] bytes = new byte[random().nextInt(10)];
+ random().nextBytes(bytes);
+ payload = new BytesRef(bytes);
+ payloadKeys[i] = new TermFreqPayload(key, weight, payload);
+ } else {
+ keys[i] = new TermFreq(key, weight);
+ payload = null;
+ }
- slowCompletor.add(new TermFreq2(key, analyzedKey, weight));
+ slowCompletor.add(new TermFreq2(key, analyzedKey, weight, payload));
}
if (VERBOSE) {
@@ -625,7 +704,11 @@ public class AnalyzingSuggesterTest exte
Analyzer a = new MockTokenEatingAnalyzer(numStopChars, preserveHoles);
AnalyzingSuggester suggester = new AnalyzingSuggester(a, a,
preserveSep ? AnalyzingSuggester.PRESERVE_SEP : 0, 256, -1);
- suggester.build(new TermFreqArrayIterator(keys));
+ if (doPayloads) {
+ suggester.build(new TermFreqPayloadArrayIterator(payloadKeys));
+ } else {
+ suggester.build(new TermFreqArrayIterator(keys));
+ }
for (String prefix : allPrefixes) {
@@ -642,6 +725,7 @@ public class AnalyzingSuggesterTest exte
// "Analyze" the key:
String[] tokens = prefix.split(" ");
StringBuilder builder = new StringBuilder();
+ boolean lastRemoved = false;
for(int i=0;i<tokens.length;i++) {
String token = tokens[i];
if (preserveSep && builder.length() > 0 && !builder.toString().endsWith(""+SEP)) {
@@ -652,8 +736,10 @@ public class AnalyzingSuggesterTest exte
if (preserveSep && preserveHoles) {
builder.append(SEP);
}
+ lastRemoved = true;
} else {
builder.append(token);
+ lastRemoved = false;
}
}
@@ -676,6 +762,10 @@ public class AnalyzingSuggesterTest exte
continue;
}
+ if (preserveSep && (prefix.endsWith(" ") || lastRemoved)) {
+ analyzedKey += SEP;
+ }
+
if (VERBOSE) {
System.out.println(" analyzed: " + analyzedKey);
}
@@ -725,6 +815,9 @@ public class AnalyzingSuggesterTest exte
//System.out.println(" check hit " + hit);
assertEquals(matches.get(hit).surfaceForm.toString(), r.get(hit).key.toString());
assertEquals(matches.get(hit).weight, r.get(hit).value, 0f);
+ if (doPayloads) {
+ assertEquals(matches.get(hit).payload, r.get(hit).payload);
+ }
}
}
}
@@ -1060,4 +1153,15 @@ public class AnalyzingSuggesterTest exte
}));
assertEquals("[a a/7, a c/6, a b/5]", suggester.lookup("a", false, 3).toString());
}
+
+ public void testEndingSpace() throws Exception {
+ Analyzer a = new MockAnalyzer(random());
+ AnalyzingSuggester suggester = new AnalyzingSuggester(a, a, AnalyzingSuggester.PRESERVE_SEP, 256, -1);
+ suggester.build(new TermFreqArrayIterator(new TermFreq[] {
+ new TermFreq("i love lucy", 7),
+ new TermFreq("isla de muerta", 8),
+ }));
+ assertEquals("[isla de muerta/8, i love lucy/7]", suggester.lookup("i", false, 3).toString());
+ assertEquals("[i love lucy/7]", suggester.lookup("i ", false, 3).toString());
+ }
}
Modified: lucene/dev/branches/security/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/FuzzySuggesterTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/FuzzySuggesterTest.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/FuzzySuggesterTest.java (original)
+++ lucene/dev/branches/security/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/FuzzySuggesterTest.java Thu May 30 07:53:18 2013
@@ -153,8 +153,9 @@ public class FuzzySuggesterTest extends
new TermFreq("the ghost of christmas past", 50),
};
- Analyzer standard = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, true, MockTokenFilter.ENGLISH_STOPSET, false);
+ Analyzer standard = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, true, MockTokenFilter.ENGLISH_STOPSET);
FuzzySuggester suggester = new FuzzySuggester(standard);
+ suggester.setPreservePositionIncrements(false);
suggester.build(new TermFreqArrayIterator(keys));
List<LookupResult> results = suggester.lookup(_TestUtil.stringToCharSequence("the ghost of chris", random()), false, 1);
@@ -594,6 +595,7 @@ public class FuzzySuggesterTest extends
while(true) {
key = "";
analyzedKey = "";
+ boolean lastRemoved = false;
for(int token=0;token < numTokens;token++) {
String s;
while (true) {
@@ -612,8 +614,10 @@ public class FuzzySuggesterTest extends
if (preserveSep && preserveHoles) {
analyzedKey += '\u0000';
}
+ lastRemoved = true;
} else {
analyzedKey += s;
+ lastRemoved = false;
}
break;
}
@@ -622,6 +626,10 @@ public class FuzzySuggesterTest extends
analyzedKey = analyzedKey.replaceAll("(^| )\u0000$", "");
+ if (preserveSep && lastRemoved) {
+ analyzedKey += " ";
+ }
+
// Don't add same surface form more than once:
if (!seen.contains(key)) {
seen.add(key);
@@ -669,6 +677,7 @@ public class FuzzySuggesterTest extends
// "Analyze" the key:
String[] tokens = prefix.split(" ");
StringBuilder builder = new StringBuilder();
+ boolean lastRemoved = false;
for(int i=0;i<tokens.length;i++) {
String token = tokens[i];
if (preserveSep && builder.length() > 0 && !builder.toString().endsWith(" ")) {
@@ -679,8 +688,10 @@ public class FuzzySuggesterTest extends
if (preserveSep && preserveHoles) {
builder.append("\u0000");
}
+ lastRemoved = true;
} else {
builder.append(token);
+ lastRemoved = false;
}
}
@@ -704,6 +715,10 @@ public class FuzzySuggesterTest extends
continue;
}
+ if (preserveSep && (prefix.endsWith(" ") || lastRemoved)) {
+ analyzedKey += " ";
+ }
+
if (VERBOSE) {
System.out.println(" analyzed: " + analyzedKey);
}
Modified: lucene/dev/branches/security/lucene/test-framework/ivy.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/test-framework/ivy.xml?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/test-framework/ivy.xml (original)
+++ lucene/dev/branches/security/lucene/test-framework/ivy.xml Thu May 30 07:53:18 2013
@@ -32,8 +32,8 @@
<dependency org="org.apache.ant" name="ant" rev="1.8.2" transitive="false" />
<dependency org="junit" name="junit" rev="4.10" transitive="false" conf="default->*;junit4-stdalone->*" />
- <dependency org="com.carrotsearch.randomizedtesting" name="junit4-ant" rev="2.0.8" transitive="false" conf="default->*;junit4-stdalone->*" />
- <dependency org="com.carrotsearch.randomizedtesting" name="randomizedtesting-runner" rev="2.0.8" transitive="false" conf="default->*;junit4-stdalone->*" />
+ <dependency org="com.carrotsearch.randomizedtesting" name="junit4-ant" rev="2.0.10" transitive="false" conf="default->*;junit4-stdalone->*" />
+ <dependency org="com.carrotsearch.randomizedtesting" name="randomizedtesting-runner" rev="2.0.10" transitive="false" conf="default->*;junit4-stdalone->*" />
<exclude org="*" ext="*" matcher="regexp" type="${ivy.exclude.types}"/>
</dependencies>
Modified: lucene/dev/branches/security/lucene/test-framework/src/java/org/apache/lucene/analysis/MockAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/test-framework/src/java/org/apache/lucene/analysis/MockAnalyzer.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/test-framework/src/java/org/apache/lucene/analysis/MockAnalyzer.java (original)
+++ lucene/dev/branches/security/lucene/test-framework/src/java/org/apache/lucene/analysis/MockAnalyzer.java Thu May 30 07:53:18 2013
@@ -17,7 +17,6 @@ package org.apache.lucene.analysis;
* limitations under the License.
*/
-import java.io.IOException;
import java.io.Reader;
import java.util.HashMap;
import java.util.Map;
@@ -46,7 +45,6 @@ public final class MockAnalyzer extends
private final CharacterRunAutomaton runAutomaton;
private final boolean lowerCase;
private final CharacterRunAutomaton filter;
- private final boolean enablePositionIncrements;
private int positionIncrementGap;
private final Random random;
private Map<String,Integer> previousMappings = new HashMap<String,Integer>();
@@ -60,30 +58,28 @@ public final class MockAnalyzer extends
* @param runAutomaton DFA describing how tokenization should happen (e.g. [a-zA-Z]+)
* @param lowerCase true if the tokenizer should lowercase terms
* @param filter DFA describing how terms should be filtered (set of stopwords, etc)
- * @param enablePositionIncrements true if position increments should reflect filtered terms.
*/
- public MockAnalyzer(Random random, CharacterRunAutomaton runAutomaton, boolean lowerCase, CharacterRunAutomaton filter, boolean enablePositionIncrements) {
+ public MockAnalyzer(Random random, CharacterRunAutomaton runAutomaton, boolean lowerCase, CharacterRunAutomaton filter) {
super(new PerFieldReuseStrategy());
// TODO: this should be solved in a different way; Random should not be shared (!).
this.random = new Random(random.nextLong());
this.runAutomaton = runAutomaton;
this.lowerCase = lowerCase;
this.filter = filter;
- this.enablePositionIncrements = enablePositionIncrements;
}
/**
- * Calls {@link #MockAnalyzer(Random, CharacterRunAutomaton, boolean, CharacterRunAutomaton, boolean)
+ * Calls {@link #MockAnalyzer(Random, CharacterRunAutomaton, boolean, CharacterRunAutomaton)
* MockAnalyzer(random, runAutomaton, lowerCase, MockTokenFilter.EMPTY_STOPSET, false}).
*/
public MockAnalyzer(Random random, CharacterRunAutomaton runAutomaton, boolean lowerCase) {
- this(random, runAutomaton, lowerCase, MockTokenFilter.EMPTY_STOPSET, true);
+ this(random, runAutomaton, lowerCase, MockTokenFilter.EMPTY_STOPSET);
}
/**
* Create a Whitespace-lowercasing analyzer with no stopwords removal.
* <p>
- * Calls {@link #MockAnalyzer(Random, CharacterRunAutomaton, boolean, CharacterRunAutomaton, boolean)
+ * Calls {@link #MockAnalyzer(Random, CharacterRunAutomaton, boolean, CharacterRunAutomaton)
* MockAnalyzer(random, MockTokenizer.WHITESPACE, true, MockTokenFilter.EMPTY_STOPSET, false}).
*/
public MockAnalyzer(Random random) {
@@ -95,7 +91,6 @@ public final class MockAnalyzer extends
MockTokenizer tokenizer = new MockTokenizer(reader, runAutomaton, lowerCase, maxTokenLength);
tokenizer.setEnableChecks(enableChecks);
MockTokenFilter filt = new MockTokenFilter(tokenizer, filter);
- filt.setEnablePositionIncrements(enablePositionIncrements);
return new TokenStreamComponents(tokenizer, maybePayload(filt, fieldName));
}
Modified: lucene/dev/branches/security/lucene/test-framework/src/java/org/apache/lucene/analysis/MockTokenFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/test-framework/src/java/org/apache/lucene/analysis/MockTokenFilter.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/test-framework/src/java/org/apache/lucene/analysis/MockTokenFilter.java (original)
+++ lucene/dev/branches/security/lucene/test-framework/src/java/org/apache/lucene/analysis/MockTokenFilter.java Thu May 30 07:53:18 2013
@@ -55,7 +55,6 @@ public final class MockTokenFilter exten
makeString("with"))));
private final CharacterRunAutomaton filter;
- private boolean enablePositionIncrements = true;
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
private final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class);
@@ -80,9 +79,7 @@ public final class MockTokenFilter exten
int skippedPositions = 0;
while (input.incrementToken()) {
if (!filter.run(termAtt.buffer(), 0, termAtt.length())) {
- if (enablePositionIncrements) {
- posIncrAtt.setPositionIncrement(posIncrAtt.getPositionIncrement() + skippedPositions);
- }
+ posIncrAtt.setPositionIncrement(posIncrAtt.getPositionIncrement() + skippedPositions);
return true;
}
skippedPositions += posIncrAtt.getPositionIncrement();
@@ -90,20 +87,4 @@ public final class MockTokenFilter exten
// reached EOS -- return false
return false;
}
-
- /**
- * @see #setEnablePositionIncrements(boolean)
- */
- public boolean getEnablePositionIncrements() {
- return enablePositionIncrements;
- }
-
- /**
- * If <code>true</code>, this Filter will preserve
- * positions of the incoming tokens (ie, accumulate and
- * set position increments of the removed stop tokens).
- */
- public void setEnablePositionIncrements(boolean enable) {
- this.enablePositionIncrements = enable;
- }
}
Modified: lucene/dev/branches/security/lucene/test-framework/src/java/org/apache/lucene/analysis/MockTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/test-framework/src/java/org/apache/lucene/analysis/MockTokenizer.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/test-framework/src/java/org/apache/lucene/analysis/MockTokenizer.java (original)
+++ lucene/dev/branches/security/lucene/test-framework/src/java/org/apache/lucene/analysis/MockTokenizer.java Thu May 30 07:53:18 2013
@@ -100,12 +100,21 @@ public class MockTokenizer extends Token
public MockTokenizer(Reader input, CharacterRunAutomaton runAutomaton, boolean lowerCase) {
this(input, runAutomaton, lowerCase, DEFAULT_MAX_TOKEN_LENGTH);
}
-
/** Calls {@link #MockTokenizer(Reader, CharacterRunAutomaton, boolean) MockTokenizer(Reader, WHITESPACE, true)} */
public MockTokenizer(Reader input) {
this(input, WHITESPACE, true);
}
-
+
+ public MockTokenizer(AttributeFactory factory, Reader input, CharacterRunAutomaton runAutomaton, boolean lowerCase) {
+ this(factory, input, runAutomaton, lowerCase, DEFAULT_MAX_TOKEN_LENGTH);
+ }
+
+ /** Calls {@link #MockTokenizer(org.apache.lucene.util.AttributeSource.AttributeFactory,Reader,CharacterRunAutomaton,boolean)
+ * MockTokenizer(AttributeFactory, Reader, WHITESPACE, true)} */
+ public MockTokenizer(AttributeFactory factory, Reader input) {
+ this(input, WHITESPACE, true);
+ }
+
@Override
public final boolean incrementToken() throws IOException {
assert !enableChecks || (streamState == State.RESET || streamState == State.INCREMENT)
Modified: lucene/dev/branches/security/lucene/test-framework/src/java/org/apache/lucene/codecs/cheapbastard/CheapBastardDocValuesProducer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/test-framework/src/java/org/apache/lucene/codecs/cheapbastard/CheapBastardDocValuesProducer.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/test-framework/src/java/org/apache/lucene/codecs/cheapbastard/CheapBastardDocValuesProducer.java (original)
+++ lucene/dev/branches/security/lucene/test-framework/src/java/org/apache/lucene/codecs/cheapbastard/CheapBastardDocValuesProducer.java Thu May 30 07:53:18 2013
@@ -17,6 +17,10 @@ package org.apache.lucene.codecs.cheapba
* limitations under the License.
*/
+import static org.apache.lucene.codecs.diskdv.DiskDocValuesConsumer.DELTA_COMPRESSED;
+import static org.apache.lucene.codecs.diskdv.DiskDocValuesConsumer.GCD_COMPRESSED;
+import static org.apache.lucene.codecs.diskdv.DiskDocValuesConsumer.TABLE_COMPRESSED;
+
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
@@ -37,6 +41,7 @@ import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.packed.BlockPackedReader;
import org.apache.lucene.util.packed.MonotonicBlockPackedReader;
+import org.apache.lucene.util.packed.PackedInts;
class CheapBastardDocValuesProducer extends DocValuesProducer {
private final Map<Integer,NumericEntry> numerics;
@@ -50,15 +55,17 @@ class CheapBastardDocValuesProducer exte
// read in the entries from the metadata file.
IndexInput in = state.directory.openInput(metaName, state.context);
boolean success = false;
+ final int version;
try {
- CodecUtil.checkHeader(in, metaCodec,
- DiskDocValuesFormat.VERSION_START,
- DiskDocValuesFormat.VERSION_START);
+ version = CodecUtil.checkHeader(in, metaCodec,
+ DiskDocValuesFormat.VERSION_START,
+ DiskDocValuesFormat.VERSION_CURRENT);
numerics = new HashMap<Integer,NumericEntry>();
ords = new HashMap<Integer,NumericEntry>();
ordIndexes = new HashMap<Integer,NumericEntry>();
binaries = new HashMap<Integer,BinaryEntry>();
readFields(in);
+
success = true;
} finally {
if (success) {
@@ -67,12 +74,25 @@ class CheapBastardDocValuesProducer exte
IOUtils.closeWhileHandlingException(in);
}
}
-
- String dataName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, dataExtension);
- data = state.directory.openInput(dataName, state.context);
- CodecUtil.checkHeader(data, dataCodec,
- DiskDocValuesFormat.VERSION_START,
- DiskDocValuesFormat.VERSION_START);
+
+ success = false;
+ try {
+ String dataName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, dataExtension);
+ data = state.directory.openInput(dataName, state.context);
+ final int version2 = CodecUtil.checkHeader(data, dataCodec,
+ DiskDocValuesFormat.VERSION_START,
+ DiskDocValuesFormat.VERSION_CURRENT);
+ if (version != version2) {
+ throw new CorruptIndexException("Versions mismatch");
+ }
+
+ success = true;
+ } finally {
+ if (!success) {
+ IOUtils.closeWhileHandlingException(this.data);
+ }
+ }
+
}
private void readFields(IndexInput meta) throws IOException {
@@ -140,10 +160,34 @@ class CheapBastardDocValuesProducer exte
static NumericEntry readNumericEntry(IndexInput meta) throws IOException {
NumericEntry entry = new NumericEntry();
+ entry.format = meta.readVInt();
entry.packedIntsVersion = meta.readVInt();
entry.offset = meta.readLong();
entry.count = meta.readVLong();
entry.blockSize = meta.readVInt();
+ switch(entry.format) {
+ case GCD_COMPRESSED:
+ entry.minValue = meta.readLong();
+ entry.gcd = meta.readLong();
+ break;
+ case TABLE_COMPRESSED:
+ if (entry.count > Integer.MAX_VALUE) {
+ throw new CorruptIndexException("Cannot use TABLE_COMPRESSED with more than MAX_VALUE values, input=" + meta);
+ }
+ final int uniqueValues = meta.readVInt();
+ if (uniqueValues > 256) {
+ throw new CorruptIndexException("TABLE_COMPRESSED cannot have more than 256 distinct values, input=" + meta);
+ }
+ entry.table = new long[uniqueValues];
+ for (int i = 0; i < uniqueValues; ++i) {
+ entry.table[i] = meta.readLong();
+ }
+ break;
+ case DELTA_COMPRESSED:
+ break;
+ default:
+ throw new CorruptIndexException("Unknown format: " + entry.format + ", input=" + meta);
+ }
return entry;
}
@@ -171,13 +215,38 @@ class CheapBastardDocValuesProducer exte
final IndexInput data = this.data.clone();
data.seek(entry.offset);
- final BlockPackedReader reader = new BlockPackedReader(data, entry.packedIntsVersion, entry.blockSize, entry.count, true);
- return new LongNumericDocValues() {
- @Override
- public long get(long id) {
- return reader.get(id);
- }
- };
+ switch (entry.format) {
+ case DELTA_COMPRESSED:
+ final BlockPackedReader reader = new BlockPackedReader(data, entry.packedIntsVersion, entry.blockSize, entry.count, true);
+ return new LongNumericDocValues() {
+ @Override
+ public long get(long id) {
+ return reader.get(id);
+ }
+ };
+ case GCD_COMPRESSED:
+ final long min = entry.minValue;
+ final long mult = entry.gcd;
+ final BlockPackedReader quotientReader = new BlockPackedReader(data, entry.packedIntsVersion, entry.blockSize, entry.count, true);
+ return new LongNumericDocValues() {
+ @Override
+ public long get(long id) {
+ return min + mult * quotientReader.get(id);
+ }
+ };
+ case TABLE_COMPRESSED:
+ final long[] table = entry.table;
+ final int bitsRequired = PackedInts.bitsRequired(table.length - 1);
+ final PackedInts.Reader ords = PackedInts.getDirectReaderNoHeader(data, PackedInts.Format.PACKED, entry.packedIntsVersion, (int) entry.count, bitsRequired);
+ return new LongNumericDocValues() {
+ @Override
+ long get(long id) {
+ return table[(int) ords.get((int) id)];
+ }
+ };
+ default:
+ throw new AssertionError();
+ }
}
@Override
@@ -315,9 +384,14 @@ class CheapBastardDocValuesProducer exte
static class NumericEntry {
long offset;
+ int format;
int packedIntsVersion;
long count;
int blockSize;
+
+ long minValue;
+ long gcd;
+ long table[];
}
static class BinaryEntry {
Modified: lucene/dev/branches/security/lucene/test-framework/src/java/org/apache/lucene/codecs/ramonly/RAMOnlyPostingsFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/test-framework/src/java/org/apache/lucene/codecs/ramonly/RAMOnlyPostingsFormat.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/test-framework/src/java/org/apache/lucene/codecs/ramonly/RAMOnlyPostingsFormat.java (original)
+++ lucene/dev/branches/security/lucene/test-framework/src/java/org/apache/lucene/codecs/ramonly/RAMOnlyPostingsFormat.java Thu May 30 07:53:18 2013
@@ -400,11 +400,8 @@ public final class RAMOnlyPostingsFormat
}
@Override
- public int advance(int targetDocID) {
- do {
- nextDoc();
- } while (upto < ramTerm.docs.size() && current.docID < targetDocID);
- return NO_MORE_DOCS;
+ public int advance(int targetDocID) throws IOException {
+ return slowAdvance(targetDocID);
}
// TODO: override bulk read, for better perf
@@ -433,6 +430,11 @@ public final class RAMOnlyPostingsFormat
public int docID() {
return current.docID;
}
+
+ @Override
+ public long cost() {
+ return ramTerm.docs.size();
+ }
}
private static class RAMDocsAndPositionsEnum extends DocsAndPositionsEnum {
@@ -448,11 +450,8 @@ public final class RAMOnlyPostingsFormat
}
@Override
- public int advance(int targetDocID) {
- do {
- nextDoc();
- } while (upto < ramTerm.docs.size() && current.docID < targetDocID);
- return NO_MORE_DOCS;
+ public int advance(int targetDocID) throws IOException {
+ return slowAdvance(targetDocID);
}
// TODO: override bulk read, for better perf
@@ -505,6 +504,11 @@ public final class RAMOnlyPostingsFormat
return null;
}
}
+
+ @Override
+ public long cost() {
+ return ramTerm.docs.size();
+ }
}
// Holds all indexes created, keyed by the ID assigned in fieldsConsumer
Modified: lucene/dev/branches/security/lucene/test-framework/src/java/org/apache/lucene/index/AssertingAtomicReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/test-framework/src/java/org/apache/lucene/index/AssertingAtomicReader.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/test-framework/src/java/org/apache/lucene/index/AssertingAtomicReader.java (original)
+++ lucene/dev/branches/security/lucene/test-framework/src/java/org/apache/lucene/index/AssertingAtomicReader.java Thu May 30 07:53:18 2013
@@ -84,7 +84,7 @@ public class AssertingAtomicReader exten
@Override
public TermsEnum intersect(CompiledAutomaton automaton, BytesRef bytes) throws IOException {
- TermsEnum termsEnum = super.intersect(automaton, bytes);
+ TermsEnum termsEnum = in.intersect(automaton, bytes);
assert termsEnum != null;
assert bytes == null || bytes.isValid();
return new AssertingTermsEnum(termsEnum);
@@ -223,45 +223,63 @@ public class AssertingAtomicReader exten
}
static enum DocsEnumState { START, ITERATING, FINISHED };
- static class AssertingDocsEnum extends FilterDocsEnum {
+
+ /** Wraps a docsenum with additional checks */
+ public static class AssertingDocsEnum extends FilterDocsEnum {
private DocsEnumState state = DocsEnumState.START;
+ private int doc;
public AssertingDocsEnum(DocsEnum in) {
+ this(in, true);
+ }
+
+ public AssertingDocsEnum(DocsEnum in, boolean failOnUnsupportedDocID) {
super(in);
- int docid = in.docID();
- assert docid == -1 || docid == DocIdSetIterator.NO_MORE_DOCS : "invalid initial doc id: " + docid;
+ try {
+ int docid = in.docID();
+ assert docid == -1 : in.getClass() + ": invalid initial doc id: " + docid;
+ } catch (UnsupportedOperationException e) {
+ if (failOnUnsupportedDocID) {
+ throw e;
+ }
+ }
+ doc = -1;
}
@Override
public int nextDoc() throws IOException {
assert state != DocsEnumState.FINISHED : "nextDoc() called after NO_MORE_DOCS";
int nextDoc = super.nextDoc();
- assert nextDoc >= 0 : "invalid doc id: " + nextDoc;
+ assert nextDoc > doc : "backwards nextDoc from " + doc + " to " + nextDoc + " " + in;
if (nextDoc == DocIdSetIterator.NO_MORE_DOCS) {
state = DocsEnumState.FINISHED;
} else {
state = DocsEnumState.ITERATING;
}
- return nextDoc;
+ assert super.docID() == nextDoc;
+ return doc = nextDoc;
}
@Override
public int advance(int target) throws IOException {
assert state != DocsEnumState.FINISHED : "advance() called after NO_MORE_DOCS";
+ assert target > doc : "target must be > docID(), got " + target + " <= " + doc;
int advanced = super.advance(target);
- assert advanced >= 0 : "invalid doc id: " + advanced;
assert advanced >= target : "backwards advance from: " + target + " to: " + advanced;
if (advanced == DocIdSetIterator.NO_MORE_DOCS) {
state = DocsEnumState.FINISHED;
} else {
state = DocsEnumState.ITERATING;
}
- return advanced;
+ assert super.docID() == advanced;
+ return doc = advanced;
}
- // NOTE: We don't assert anything for docId(). Specifically DocsEnum javadocs
- // are ambiguous with DocIdSetIterator here, DocIdSetIterator says its ok
- // to call this method before nextDoc(), just that it must be -1 or NO_MORE_DOCS!
+ @Override
+ public int docID() {
+ assert doc == super.docID() : " invalid docID() in " + in.getClass() + " " + super.docID() + " instead of " + doc;
+ return doc;
+ }
@Override
public int freq() throws IOException {
@@ -277,18 +295,20 @@ public class AssertingAtomicReader exten
private DocsEnumState state = DocsEnumState.START;
private int positionMax = 0;
private int positionCount = 0;
+ private int doc;
public AssertingDocsAndPositionsEnum(DocsAndPositionsEnum in) {
super(in);
int docid = in.docID();
- assert docid == -1 || docid == DocIdSetIterator.NO_MORE_DOCS : "invalid initial doc id: " + docid;
+ assert docid == -1 : "invalid initial doc id: " + docid;
+ doc = -1;
}
@Override
public int nextDoc() throws IOException {
assert state != DocsEnumState.FINISHED : "nextDoc() called after NO_MORE_DOCS";
int nextDoc = super.nextDoc();
- assert nextDoc >= 0 : "invalid doc id: " + nextDoc;
+ assert nextDoc > doc : "backwards nextDoc from " + doc + " to " + nextDoc;
positionCount = 0;
if (nextDoc == DocIdSetIterator.NO_MORE_DOCS) {
state = DocsEnumState.FINISHED;
@@ -297,14 +317,15 @@ public class AssertingAtomicReader exten
state = DocsEnumState.ITERATING;
positionMax = super.freq();
}
- return nextDoc;
+ assert super.docID() == nextDoc;
+ return doc = nextDoc;
}
@Override
public int advance(int target) throws IOException {
assert state != DocsEnumState.FINISHED : "advance() called after NO_MORE_DOCS";
+ assert target > doc : "target must be > docID(), got " + target + " <= " + doc;
int advanced = super.advance(target);
- assert advanced >= 0 : "invalid doc id: " + advanced;
assert advanced >= target : "backwards advance from: " + target + " to: " + advanced;
positionCount = 0;
if (advanced == DocIdSetIterator.NO_MORE_DOCS) {
@@ -314,7 +335,14 @@ public class AssertingAtomicReader exten
state = DocsEnumState.ITERATING;
positionMax = super.freq();
}
- return advanced;
+ assert super.docID() == advanced;
+ return doc = advanced;
+ }
+
+ @Override
+ public int docID() {
+ assert doc == super.docID() : " invalid docID() in " + in.getClass() + " " + super.docID() + " instead of " + doc;
+ return doc;
}
@Override
Modified: lucene/dev/branches/security/lucene/test-framework/src/java/org/apache/lucene/index/AssertingDirectoryReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/test-framework/src/java/org/apache/lucene/index/AssertingDirectoryReader.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/test-framework/src/java/org/apache/lucene/index/AssertingDirectoryReader.java (original)
+++ lucene/dev/branches/security/lucene/test-framework/src/java/org/apache/lucene/index/AssertingDirectoryReader.java Thu May 30 07:53:18 2013
@@ -17,68 +17,29 @@ package org.apache.lucene.index;
* limitations under the License.
*/
-import java.io.IOException;
-import java.util.List;
-
/**
* A {@link DirectoryReader} that wraps all its subreaders with
* {@link AssertingAtomicReader}
*/
-public class AssertingDirectoryReader extends DirectoryReader {
- protected DirectoryReader in;
+public class AssertingDirectoryReader extends FilterDirectoryReader {
- public AssertingDirectoryReader(DirectoryReader in) {
- super(in.directory(), wrap(in.getSequentialSubReaders()));
- this.in = in;
- }
-
- private static AtomicReader[] wrap(List<? extends AtomicReader> readers) {
- AtomicReader[] wrapped = new AtomicReader[readers.size()];
- for (int i = 0; i < readers.size(); i++) {
- wrapped[i] = new AssertingAtomicReader(readers.get(i));
+ static class AssertingSubReaderWrapper extends SubReaderWrapper {
+ @Override
+ public AtomicReader wrap(AtomicReader reader) {
+ return new AssertingAtomicReader(reader);
}
- return wrapped;
- }
-
- @Override
- protected DirectoryReader doOpenIfChanged() throws IOException {
- DirectoryReader d = in.doOpenIfChanged();
- return d == null ? null : new AssertingDirectoryReader(d);
- }
-
- @Override
- protected DirectoryReader doOpenIfChanged(IndexCommit commit) throws IOException {
- DirectoryReader d = in.doOpenIfChanged(commit);
- return d == null ? null : new AssertingDirectoryReader(d);
- }
-
- @Override
- protected DirectoryReader doOpenIfChanged(IndexWriter writer, boolean applyAllDeletes) throws IOException {
- DirectoryReader d = in.doOpenIfChanged(writer, applyAllDeletes);
- return d == null ? null : new AssertingDirectoryReader(d);
- }
-
- @Override
- public long getVersion() {
- return in.getVersion();
}
- @Override
- public boolean isCurrent() throws IOException {
- return in.isCurrent();
+ public AssertingDirectoryReader(DirectoryReader in) {
+ super(in, new AssertingSubReaderWrapper());
}
@Override
- public IndexCommit getIndexCommit() throws IOException {
- return in.getIndexCommit();
+ protected DirectoryReader doWrapDirectoryReader(DirectoryReader in) {
+ return new AssertingDirectoryReader(in);
}
@Override
- protected void doClose() throws IOException {
- in.doClose();
- }
-
- @Override
public Object getCoreCacheKey() {
return in.getCoreCacheKey();
}
@@ -87,4 +48,5 @@ public class AssertingDirectoryReader ex
public Object getCombinedCoreAndDeletesKey() {
return in.getCombinedCoreAndDeletesKey();
}
+
}
Modified: lucene/dev/branches/security/lucene/test-framework/src/java/org/apache/lucene/index/BaseDocValuesFormatTestCase.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/test-framework/src/java/org/apache/lucene/index/BaseDocValuesFormatTestCase.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/test-framework/src/java/org/apache/lucene/index/BaseDocValuesFormatTestCase.java (original)
+++ lucene/dev/branches/security/lucene/test-framework/src/java/org/apache/lucene/index/BaseDocValuesFormatTestCase.java Thu May 30 07:53:18 2013
@@ -25,15 +25,13 @@ import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
+import java.util.Map.Entry;
import java.util.Set;
import java.util.TreeSet;
-import java.util.Map.Entry;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.codecs.Codec;
-import org.apache.lucene.codecs.DocValuesFormat;
-import org.apache.lucene.codecs.lucene42.Lucene42Codec;
import org.apache.lucene.document.BinaryDocValuesField;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
@@ -44,7 +42,7 @@ import org.apache.lucene.document.Sorted
import org.apache.lucene.document.StoredField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
-import org.apache.lucene.index.FieldInfo.DocValuesType;
+import org.apache.lucene.index.TermsEnum.SeekStatus;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.FieldCache;
@@ -702,6 +700,77 @@ public abstract class BaseDocValuesForma
directory.close();
}
+ public void testSortedTermsEnum() throws IOException {
+ Directory directory = newDirectory();
+ Analyzer analyzer = new MockAnalyzer(random());
+ IndexWriterConfig iwconfig = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
+ iwconfig.setMergePolicy(newLogMergePolicy());
+ RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, iwconfig);
+
+ Document doc = new Document();
+ doc.add(new SortedDocValuesField("field", new BytesRef("hello")));
+ iwriter.addDocument(doc);
+
+ doc = new Document();
+ doc.add(new SortedDocValuesField("field", new BytesRef("world")));
+ iwriter.addDocument(doc);
+
+ doc = new Document();
+ doc.add(new SortedDocValuesField("field", new BytesRef("beer")));
+ iwriter.addDocument(doc);
+ iwriter.forceMerge(1);
+
+ DirectoryReader ireader = iwriter.getReader();
+ iwriter.close();
+
+ SortedDocValues dv = getOnlySegmentReader(ireader).getSortedDocValues("field");
+ assertEquals(3, dv.getValueCount());
+
+ TermsEnum termsEnum = dv.termsEnum();
+
+ // next()
+ assertEquals("beer", termsEnum.next().utf8ToString());
+ assertEquals(0, termsEnum.ord());
+ assertEquals("hello", termsEnum.next().utf8ToString());
+ assertEquals(1, termsEnum.ord());
+ assertEquals("world", termsEnum.next().utf8ToString());
+ assertEquals(2, termsEnum.ord());
+
+ // seekCeil()
+ assertEquals(SeekStatus.NOT_FOUND, termsEnum.seekCeil(new BytesRef("ha!")));
+ assertEquals("hello", termsEnum.term().utf8ToString());
+ assertEquals(1, termsEnum.ord());
+ assertEquals(SeekStatus.FOUND, termsEnum.seekCeil(new BytesRef("beer")));
+ assertEquals("beer", termsEnum.term().utf8ToString());
+ assertEquals(0, termsEnum.ord());
+ assertEquals(SeekStatus.END, termsEnum.seekCeil(new BytesRef("zzz")));
+
+ // seekExact()
+ assertTrue(termsEnum.seekExact(new BytesRef("beer"), true));
+ assertEquals("beer", termsEnum.term().utf8ToString());
+ assertEquals(0, termsEnum.ord());
+ assertTrue(termsEnum.seekExact(new BytesRef("hello"), true));
+ assertEquals(Codec.getDefault().toString(), "hello", termsEnum.term().utf8ToString());
+ assertEquals(1, termsEnum.ord());
+ assertTrue(termsEnum.seekExact(new BytesRef("world"), true));
+ assertEquals("world", termsEnum.term().utf8ToString());
+ assertEquals(2, termsEnum.ord());
+ assertFalse(termsEnum.seekExact(new BytesRef("bogus"), true));
+
+ // seek(ord)
+ termsEnum.seekExact(0);
+ assertEquals("beer", termsEnum.term().utf8ToString());
+ assertEquals(0, termsEnum.ord());
+ termsEnum.seekExact(1);
+ assertEquals("hello", termsEnum.term().utf8ToString());
+ assertEquals(1, termsEnum.ord());
+ termsEnum.seekExact(2);
+ assertEquals("world", termsEnum.term().utf8ToString());
+ assertEquals(2, termsEnum.ord());
+ ireader.close();
+ directory.close();
+ }
+
public void testEmptySortedBytes() throws IOException {
Analyzer analyzer = new MockAnalyzer(random());
@@ -1050,8 +1119,21 @@ public abstract class BaseDocValuesForma
w.close();
dir.close();
}
-
- private void doTestNumericsVsStoredFields(long minValue, long maxValue) throws Exception {
+
+ static abstract class LongProducer {
+ abstract long next();
+ }
+
+ private void doTestNumericsVsStoredFields(final long minValue, final long maxValue) throws Exception {
+ doTestNumericsVsStoredFields(new LongProducer() {
+ @Override
+ long next() {
+ return _TestUtil.nextLong(random(), minValue, maxValue);
+ }
+ });
+ }
+
+ private void doTestNumericsVsStoredFields(LongProducer longs) throws Exception {
Directory dir = newDirectory();
IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
RandomIndexWriter writer = new RandomIndexWriter(random(), dir, conf);
@@ -1064,10 +1146,13 @@ public abstract class BaseDocValuesForma
doc.add(dvField);
// index some docs
- int numDocs = atLeast(1000);
+ int numDocs = atLeast(300);
+ // numDocs should be always > 256 so that in case of a codec that optimizes
+ // for numbers of values <= 256, all storage layouts are tested
+ assert numDocs > 256;
for (int i = 0; i < numDocs; i++) {
idField.setStringValue(Integer.toString(i));
- long value = _TestUtil.nextLong(random(), minValue, maxValue);
+ long value = longs.next();
storedField.setStringValue(Long.toString(value));
dvField.setLongValue(value);
writer.addDocument(doc);
@@ -1082,6 +1167,11 @@ public abstract class BaseDocValuesForma
int id = random().nextInt(numDocs);
writer.deleteDocuments(new Term("id", Integer.toString(id)));
}
+
+ // merge some segments and ensure that at least one of them has more than
+ // 256 values
+ writer.forceMerge(numDocs / 256);
+
writer.close();
// compare
@@ -1146,7 +1236,7 @@ public abstract class BaseDocValuesForma
doc.add(dvField);
// index some docs
- int numDocs = atLeast(1000);
+ int numDocs = atLeast(300);
for (int i = 0; i < numDocs; i++) {
idField.setStringValue(Integer.toString(i));
final int length;
@@ -1217,7 +1307,7 @@ public abstract class BaseDocValuesForma
doc.add(dvField);
// index some docs
- int numDocs = atLeast(1000);
+ int numDocs = atLeast(300);
for (int i = 0; i < numDocs; i++) {
idField.setStringValue(Integer.toString(i));
final int length;
@@ -1658,13 +1748,78 @@ public abstract class BaseDocValuesForma
directory.close();
}
+ public void testSortedSetTermsEnum() throws IOException {
+ assumeTrue("Codec does not support SORTED_SET", defaultCodecSupportsSortedSet());
+ Directory directory = newDirectory();
+ Analyzer analyzer = new MockAnalyzer(random());
+ IndexWriterConfig iwconfig = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
+ iwconfig.setMergePolicy(newLogMergePolicy());
+ RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, iwconfig);
+
+ Document doc = new Document();
+ doc.add(new SortedSetDocValuesField("field", new BytesRef("hello")));
+ doc.add(new SortedSetDocValuesField("field", new BytesRef("world")));
+ doc.add(new SortedSetDocValuesField("field", new BytesRef("beer")));
+ iwriter.addDocument(doc);
+
+ DirectoryReader ireader = iwriter.getReader();
+ iwriter.close();
+
+ SortedSetDocValues dv = getOnlySegmentReader(ireader).getSortedSetDocValues("field");
+ assertEquals(3, dv.getValueCount());
+
+ TermsEnum termsEnum = dv.termsEnum();
+
+ // next()
+ assertEquals("beer", termsEnum.next().utf8ToString());
+ assertEquals(0, termsEnum.ord());
+ assertEquals("hello", termsEnum.next().utf8ToString());
+ assertEquals(1, termsEnum.ord());
+ assertEquals("world", termsEnum.next().utf8ToString());
+ assertEquals(2, termsEnum.ord());
+
+ // seekCeil()
+ assertEquals(SeekStatus.NOT_FOUND, termsEnum.seekCeil(new BytesRef("ha!")));
+ assertEquals("hello", termsEnum.term().utf8ToString());
+ assertEquals(1, termsEnum.ord());
+ assertEquals(SeekStatus.FOUND, termsEnum.seekCeil(new BytesRef("beer")));
+ assertEquals("beer", termsEnum.term().utf8ToString());
+ assertEquals(0, termsEnum.ord());
+ assertEquals(SeekStatus.END, termsEnum.seekCeil(new BytesRef("zzz")));
+
+ // seekExact()
+ assertTrue(termsEnum.seekExact(new BytesRef("beer"), true));
+ assertEquals("beer", termsEnum.term().utf8ToString());
+ assertEquals(0, termsEnum.ord());
+ assertTrue(termsEnum.seekExact(new BytesRef("hello"), true));
+ assertEquals("hello", termsEnum.term().utf8ToString());
+ assertEquals(1, termsEnum.ord());
+ assertTrue(termsEnum.seekExact(new BytesRef("world"), true));
+ assertEquals("world", termsEnum.term().utf8ToString());
+ assertEquals(2, termsEnum.ord());
+ assertFalse(termsEnum.seekExact(new BytesRef("bogus"), true));
+
+ // seek(ord)
+ termsEnum.seekExact(0);
+ assertEquals("beer", termsEnum.term().utf8ToString());
+ assertEquals(0, termsEnum.ord());
+ termsEnum.seekExact(1);
+ assertEquals("hello", termsEnum.term().utf8ToString());
+ assertEquals(1, termsEnum.ord());
+ termsEnum.seekExact(2);
+ assertEquals("world", termsEnum.term().utf8ToString());
+ assertEquals(2, termsEnum.ord());
+ ireader.close();
+ directory.close();
+ }
+
private void doTestSortedSetVsStoredFields(int minLength, int maxLength) throws Exception {
Directory dir = newDirectory();
IndexWriterConfig conf = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
RandomIndexWriter writer = new RandomIndexWriter(random(), dir, conf);
// index some docs
- int numDocs = atLeast(1000);
+ int numDocs = atLeast(300);
for (int i = 0; i < numDocs; i++) {
Document doc = new Document();
Field idField = new StringField("id", Integer.toString(i), Field.Store.NO);
@@ -1785,7 +1940,7 @@ public abstract class BaseDocValuesForma
RandomIndexWriter writer = new RandomIndexWriter(random(), dir, conf);
// index some docs
- int numDocs = atLeast(1000);
+ int numDocs = atLeast(300);
for (int i = 0; i < numDocs; i++) {
Document doc = new Document();
Field idField = new StringField("id", Integer.toString(i), Field.Store.NO);
@@ -1870,4 +2025,39 @@ public abstract class BaseDocValuesForma
doTestSortedSetVsUninvertedField(1, 10);
}
}
+
+ public void testGCDCompression() throws Exception {
+ int numIterations = atLeast(1);
+ for (int i = 0; i < numIterations; i++) {
+ final long min = - (((long) random().nextInt(1 << 30)) << 32);
+ final long mul = random().nextInt() & 0xFFFFFFFFL;
+ final LongProducer longs = new LongProducer() {
+ @Override
+ long next() {
+ return min + mul * random().nextInt(1 << 20);
+ }
+ };
+ doTestNumericsVsStoredFields(longs);
+ }
+ }
+
+ public void testZeros() throws Exception {
+ doTestNumericsVsStoredFields(0, 0);
+ }
+
+ public void testZeroOrMin() throws Exception {
+ // try to make GCD compression fail if the format did not anticipate that
+ // the GCD of 0 and MIN_VALUE is negative
+ int numIterations = atLeast(1);
+ for (int i = 0; i < numIterations; i++) {
+ final LongProducer longs = new LongProducer() {
+ @Override
+ long next() {
+ return random().nextBoolean() ? 0 : Long.MIN_VALUE;
+ }
+ };
+ doTestNumericsVsStoredFields(longs);
+ }
+ }
+
}
Modified: lucene/dev/branches/security/lucene/test-framework/src/java/org/apache/lucene/index/BasePostingsFormatTestCase.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/test-framework/src/java/org/apache/lucene/index/BasePostingsFormatTestCase.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/test-framework/src/java/org/apache/lucene/index/BasePostingsFormatTestCase.java (original)
+++ lucene/dev/branches/security/lucene/test-framework/src/java/org/apache/lucene/index/BasePostingsFormatTestCase.java Thu May 30 07:53:18 2013
@@ -66,9 +66,6 @@ import org.junit.BeforeClass;
// TODO test when you reuse after skipping a term or two, eg the block reuse case
-// TODO hmm contract says .doc() can return NO_MORE_DOCS
-// before nextDoc too...?
-
/* TODO
- threads
- assert doc=-1 before any nextDoc
@@ -275,11 +272,14 @@ public abstract class BasePostingsFormat
}
@Override
- public int advance(int target) {
- while(nextDoc() < target) {
- }
- return docID;
+ public int advance(int target) throws IOException {
+ return slowAdvance(target);
}
+
+ @Override
+ public long cost() {
+ return docFreq;
+ }
}
private static class FieldAndTerm {
@@ -696,7 +696,7 @@ public abstract class BasePostingsFormat
assertNotNull("null DocsEnum", docsEnum);
int initialDocID = docsEnum.docID();
- assertTrue("inital docID should be -1 or NO_MORE_DOCS: " + docsEnum, initialDocID == -1 || initialDocID == DocsEnum.NO_MORE_DOCS);
+ assertEquals("inital docID should be -1" + docsEnum, -1, initialDocID);
if (VERBOSE) {
if (prevDocsEnum == null) {
Modified: lucene/dev/branches/security/lucene/test-framework/src/java/org/apache/lucene/index/BaseTermVectorsFormatTestCase.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/test-framework/src/java/org/apache/lucene/index/BaseTermVectorsFormatTestCase.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/test-framework/src/java/org/apache/lucene/index/BaseTermVectorsFormatTestCase.java (original)
+++ lucene/dev/branches/security/lucene/test-framework/src/java/org/apache/lucene/index/BaseTermVectorsFormatTestCase.java Thu May 30 07:53:18 2013
@@ -520,7 +520,7 @@ public abstract class BaseTermVectorsFor
public void testRareVectors() throws IOException {
final RandomDocumentFactory docFactory = new RandomDocumentFactory(10, 20);
for (Options options : validOptions()) {
- final int numDocs = _TestUtil.nextInt(random(), 10, 10000);
+ final int numDocs = atLeast(200);
final int docWithVectors = random().nextInt(numDocs);
final Document emptyDoc = new Document();
final Directory dir = newDirectory();
@@ -560,7 +560,7 @@ public abstract class BaseTermVectorsFor
}
final Directory dir = newDirectory();
final RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
- final RandomDocument doc = docFactory.newDocument(_TestUtil.nextInt(random(), 1, 2), _TestUtil.nextInt(random(), 50000, 100000), options);
+ final RandomDocument doc = docFactory.newDocument(_TestUtil.nextInt(random(), 1, 2), atLeast(20000), options);
writer.addDocument(doc.toDocument());
final IndexReader reader = writer.getReader();
assertEquals(doc, reader.getTermVectors(0));
@@ -575,7 +575,7 @@ public abstract class BaseTermVectorsFor
for (Options options : validOptions()) {
final Directory dir = newDirectory();
final RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
- final RandomDocument doc = docFactory.newDocument(_TestUtil.nextInt(random(), 500, 1000), 5, options);
+ final RandomDocument doc = docFactory.newDocument(atLeast(100), 5, options);
writer.addDocument(doc.toDocument());
final IndexReader reader = writer.getReader();
assertEquals(doc, reader.getTermVectors(0));
@@ -614,7 +614,7 @@ public abstract class BaseTermVectorsFor
public void testRandom() throws IOException {
final RandomDocumentFactory docFactory = new RandomDocumentFactory(5, 20);
- final int numDocs = _TestUtil.nextInt(random(), 100, 1000);
+ final int numDocs = atLeast(100);
final RandomDocument[] docs = new RandomDocument[numDocs];
for (int i = 0; i < numDocs; ++i) {
docs[i] = docFactory.newDocument(_TestUtil.nextInt(random(), 1, 3), _TestUtil.nextInt(random(), 10, 50), randomOptions());
@@ -636,7 +636,7 @@ public abstract class BaseTermVectorsFor
public void testMerge() throws IOException {
final RandomDocumentFactory docFactory = new RandomDocumentFactory(5, 20);
- final int numDocs = _TestUtil.nextInt(random(), 100, 500);
+ final int numDocs = atLeast(100);
final int numDeletes = random().nextInt(numDocs);
final Set<Integer> deletes = new HashSet<Integer>();
while (deletes.size() < numDeletes) {
@@ -645,7 +645,7 @@ public abstract class BaseTermVectorsFor
for (Options options : validOptions()) {
final RandomDocument[] docs = new RandomDocument[numDocs];
for (int i = 0; i < numDocs; ++i) {
- docs[i] = docFactory.newDocument(_TestUtil.nextInt(random(), 1, 3), _TestUtil.nextInt(random(), 10, 50), options);
+ docs[i] = docFactory.newDocument(_TestUtil.nextInt(random(), 1, 3), atLeast(10), options);
}
final Directory dir = newDirectory();
final RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
@@ -677,11 +677,11 @@ public abstract class BaseTermVectorsFor
// don't share mutable data
public void testClone() throws IOException, InterruptedException {
final RandomDocumentFactory docFactory = new RandomDocumentFactory(5, 20);
- final int numDocs = _TestUtil.nextInt(random(), 100, 1000);
+ final int numDocs = atLeast(100);
for (Options options : validOptions()) {
final RandomDocument[] docs = new RandomDocument[numDocs];
for (int i = 0; i < numDocs; ++i) {
- docs[i] = docFactory.newDocument(_TestUtil.nextInt(random(), 1, 3), _TestUtil.nextInt(random(), 10, 50), options);
+ docs[i] = docFactory.newDocument(_TestUtil.nextInt(random(), 1, 3), atLeast(10), options);
}
final Directory dir = newDirectory();
final RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
Modified: lucene/dev/branches/security/lucene/test-framework/src/java/org/apache/lucene/index/DocHelper.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/test-framework/src/java/org/apache/lucene/index/DocHelper.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/test-framework/src/java/org/apache/lucene/index/DocHelper.java (original)
+++ lucene/dev/branches/security/lucene/test-framework/src/java/org/apache/lucene/index/DocHelper.java Thu May 30 07:53:18 2013
@@ -33,6 +33,7 @@ import org.apache.lucene.document.Stored
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.FieldInfo.IndexOptions;
+import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.store.Directory;
@@ -270,7 +271,7 @@ class DocHelper {
*/
public static SegmentInfoPerCommit writeDoc(Random random, Directory dir, Analyzer analyzer, Similarity similarity, Document doc) throws IOException {
IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig( /* LuceneTestCase.newIndexWriterConfig(random, */
- TEST_VERSION_CURRENT, analyzer).setSimilarity(similarity));
+ TEST_VERSION_CURRENT, analyzer).setSimilarity(similarity == null ? IndexSearcher.getDefaultSimilarity() : similarity));
//writer.setUseCompoundFile(false);
writer.addDocument(doc);
writer.commit();
Modified: lucene/dev/branches/security/lucene/test-framework/src/java/org/apache/lucene/index/RandomIndexWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/test-framework/src/java/org/apache/lucene/index/RandomIndexWriter.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/test-framework/src/java/org/apache/lucene/index/RandomIndexWriter.java (original)
+++ lucene/dev/branches/security/lucene/test-framework/src/java/org/apache/lucene/index/RandomIndexWriter.java Thu May 30 07:53:18 2013
@@ -103,7 +103,7 @@ public class RandomIndexWriter implement
// Make sure we sometimes test indices that don't get
// any forced merges:
- doRandomForceMerge = r.nextBoolean();
+ doRandomForceMerge = !(c.getMergePolicy() instanceof NoMergePolicy) && r.nextBoolean();
}
/**
Modified: lucene/dev/branches/security/lucene/test-framework/src/java/org/apache/lucene/search/AssertingIndexSearcher.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/test-framework/src/java/org/apache/lucene/search/AssertingIndexSearcher.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/test-framework/src/java/org/apache/lucene/search/AssertingIndexSearcher.java (original)
+++ lucene/dev/branches/security/lucene/test-framework/src/java/org/apache/lucene/search/AssertingIndexSearcher.java Thu May 30 07:53:18 2013
@@ -17,20 +17,19 @@ package org.apache.lucene.search;
* limitations under the License.
*/
+import java.io.IOException;
+import java.util.List;
import java.util.Random;
import java.util.concurrent.ExecutorService;
-import java.io.IOException;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexReaderContext;
-import org.apache.lucene.util.Bits;
import org.apache.lucene.util._TestUtil;
-/**
+/**
* Helper class that adds some extra checks to ensure correct
* usage of {@code IndexSearcher} and {@code Weight}.
- * TODO: Extend this by more checks, that's just a start.
*/
public class AssertingIndexSearcher extends IndexSearcher {
final Random random;
@@ -58,16 +57,7 @@ public class AssertingIndexSearcher exte
@Override
public Weight createNormalizedWeight(Query query) throws IOException {
final Weight w = super.createNormalizedWeight(query);
- return new Weight() {
- @Override
- public Explanation explain(AtomicReaderContext context, int doc) throws IOException {
- return w.explain(context, doc);
- }
-
- @Override
- public Query getQuery() {
- return w.getQuery();
- }
+ return new AssertingWeight(random, w) {
@Override
public void normalize(float norm, float topLevelBoost) {
@@ -75,41 +65,37 @@ public class AssertingIndexSearcher exte
}
@Override
- public Scorer scorer(AtomicReaderContext context, boolean scoreDocsInOrder,
- boolean topScorer, Bits acceptDocs) throws IOException {
- Scorer scorer = w.scorer(context, scoreDocsInOrder, topScorer, acceptDocs);
- if (scorer != null) {
- // check that scorer obeys disi contract for docID() before next()/advance
- try {
- int docid = scorer.docID();
- assert docid == -1 || docid == DocIdSetIterator.NO_MORE_DOCS;
- } catch (UnsupportedOperationException ignored) {
- // from a top-level BS1
- assert topScorer;
- }
- }
- return scorer;
- }
-
- @Override
public float getValueForNormalization() {
throw new IllegalStateException("Weight already normalized.");
}
- @Override
- public boolean scoresDocsOutOfOrder() {
- // TODO: if this returns false, we should wrap
- // Scorer with AssertingScorer that confirms docIDs
- // are in order?
- return w.scoresDocsOutOfOrder();
- }
};
}
-
+
+ @Override
+ public Query rewrite(Query original) throws IOException {
+ // TODO: use the more sophisticated QueryUtils.check sometimes!
+ QueryUtils.check(original);
+ Query rewritten = super.rewrite(original);
+ QueryUtils.check(rewritten);
+ return rewritten;
+ }
+
@Override
protected Query wrapFilter(Query query, Filter filter) {
if (random.nextBoolean())
return super.wrapFilter(query, filter);
return (filter == null) ? query : new FilteredQuery(query, filter, _TestUtil.randomFilterStrategy(random));
}
+
+ @Override
+ protected void search(List<AtomicReaderContext> leaves, Weight weight, Collector collector) throws IOException {
+ super.search(leaves, AssertingWeight.wrap(random, weight), collector);
+ }
+
+ @Override
+ public String toString() {
+ return "AssertingIndexSearcher(" + super.toString() + ")";
+ }
+
}
Modified: lucene/dev/branches/security/lucene/test-framework/src/java/org/apache/lucene/search/SearchEquivalenceTestBase.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/test-framework/src/java/org/apache/lucene/search/SearchEquivalenceTestBase.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/test-framework/src/java/org/apache/lucene/search/SearchEquivalenceTestBase.java (original)
+++ lucene/dev/branches/security/lucene/test-framework/src/java/org/apache/lucene/search/SearchEquivalenceTestBase.java Thu May 30 07:53:18 2013
@@ -59,7 +59,7 @@ public abstract class SearchEquivalenceT
directory = newDirectory();
stopword = "" + randomChar();
CharacterRunAutomaton stopset = new CharacterRunAutomaton(BasicAutomata.makeString(stopword));
- analyzer = new MockAnalyzer(random, MockTokenizer.WHITESPACE, false, stopset, true);
+ analyzer = new MockAnalyzer(random, MockTokenizer.WHITESPACE, false, stopset);
RandomIndexWriter iw = new RandomIndexWriter(random, directory, analyzer);
Document doc = new Document();
Field id = new StringField("id", "", Field.Store.NO);
Modified: lucene/dev/branches/security/lucene/test-framework/src/java/org/apache/lucene/search/ShardSearchingTestBase.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/test-framework/src/java/org/apache/lucene/search/ShardSearchingTestBase.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/test-framework/src/java/org/apache/lucene/search/ShardSearchingTestBase.java (original)
+++ lucene/dev/branches/security/lucene/test-framework/src/java/org/apache/lucene/search/ShardSearchingTestBase.java Thu May 30 07:53:18 2013
@@ -448,7 +448,7 @@ public abstract class ShardSearchingTest
currentNodeVersions = new long[numNodes];
}
- public void initSearcher(long[] nodeVersions) {
+ public void initSearcher(long[] nodeVersions) throws IOException {
assert currentShardSearcher == null;
System.arraycopy(nodeVersions, 0, currentNodeVersions, 0, currentNodeVersions.length);
currentShardSearcher = new ShardIndexSearcher(currentNodeVersions.clone(),
Modified: lucene/dev/branches/security/lucene/test-framework/src/java/org/apache/lucene/store/BaseDirectoryWrapper.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/test-framework/src/java/org/apache/lucene/store/BaseDirectoryWrapper.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/test-framework/src/java/org/apache/lucene/store/BaseDirectoryWrapper.java (original)
+++ lucene/dev/branches/security/lucene/test-framework/src/java/org/apache/lucene/store/BaseDirectoryWrapper.java Thu May 30 07:53:18 2013
@@ -20,6 +20,7 @@ package org.apache.lucene.store;
import java.io.IOException;
import java.util.Collection;
+import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.util._TestUtil;
/**
@@ -42,7 +43,7 @@ public class BaseDirectoryWrapper extend
@Override
public void close() throws IOException {
isOpen = false;
- if (checkIndexOnClose && indexPossiblyExists()) {
+ if (checkIndexOnClose && DirectoryReader.indexExists(this)) {
_TestUtil.checkIndex(this, crossCheckTermVectorsOnClose);
}
delegate.close();
@@ -52,27 +53,6 @@ public class BaseDirectoryWrapper extend
return isOpen;
}
- /**
- * don't rely upon DirectoryReader.fileExists to determine if we should
- * checkIndex() or not. It might mask real problems, where we silently
- * don't checkindex at all. instead we look for a segments file.
- */
- protected boolean indexPossiblyExists() {
- String files[];
- try {
- files = listAll();
- } catch (IOException ex) {
- // this means directory doesn't exist, which is ok. return false
- return false;
- }
- for (String f : files) {
- if (f.startsWith("segments_")) {
- return true;
- }
- }
- return false;
- }
-
/**
* Set whether or not checkindex should be run
* on close
Modified: lucene/dev/branches/security/lucene/test-framework/src/java/org/apache/lucene/store/MockDirectoryWrapper.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/test-framework/src/java/org/apache/lucene/store/MockDirectoryWrapper.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/test-framework/src/java/org/apache/lucene/store/MockDirectoryWrapper.java (original)
+++ lucene/dev/branches/security/lucene/test-framework/src/java/org/apache/lucene/store/MockDirectoryWrapper.java Thu May 30 07:53:18 2013
@@ -60,13 +60,13 @@ import org.apache.lucene.util._TestUtil;
* refusing to write/delete to open files.
* </ul>
*/
-
public class MockDirectoryWrapper extends BaseDirectoryWrapper {
long maxSize;
// Max actual bytes used. This is set by MockRAMOutputStream:
long maxUsedSize;
double randomIOExceptionRate;
+ double randomIOExceptionRateOnOpen;
Random randomState;
boolean noDeleteOpenFile = true;
boolean preventDoubleWrite = true;
@@ -158,6 +158,26 @@ public class MockDirectoryWrapper extend
this.throttling = throttling;
}
+ /**
+ * Returns true if {@link #getDelegate() delegate} must sync its files.
+ * Currently, only {@link NRTCachingDirectory} requires sync'ing its files
+ * because otherwise they are cached in an internal {@link RAMDirectory}. If
+ * other directories require that too, they should be added to this method.
+ */
+ private boolean mustSync() {
+ Directory delegate = this.delegate;
+ while (true) {
+ if (delegate instanceof RateLimitedDirectoryWrapper) {
+ delegate = ((RateLimitedDirectoryWrapper) delegate).getDelegate();
+ } else if (delegate instanceof TrackingDirectoryWrapper) {
+ delegate = ((TrackingDirectoryWrapper) delegate).getDelegate();
+ } else {
+ break;
+ }
+ }
+ return delegate instanceof NRTCachingDirectory;
+ }
+
@Override
public synchronized void sync(Collection<String> names) throws IOException {
maybeYield();
@@ -165,12 +185,16 @@ public class MockDirectoryWrapper extend
if (crashed) {
throw new IOException("cannot sync after crash");
}
- unSyncedFiles.removeAll(names);
- // TODO: need to improve hack to be OK w/
- // RateLimitingDirWrapper in between...
- if (true || LuceneTestCase.rarely(randomState) || delegate instanceof NRTCachingDirectory) {
- // don't wear out our hardware so much in tests.
- delegate.sync(names);
+ // don't wear out our hardware so much in tests.
+ if (LuceneTestCase.rarely(randomState) || mustSync()) {
+ for (String name : names) {
+ // randomly fail with IOE on any file
+ maybeThrowIOException(name);
+ delegate.sync(Collections.singleton(name));
+ unSyncedFiles.remove(name);
+ }
+ } else {
+ unSyncedFiles.removeAll(names);
}
}
@@ -322,23 +346,46 @@ public class MockDirectoryWrapper extend
public void setRandomIOExceptionRate(double rate) {
randomIOExceptionRate = rate;
}
+
public double getRandomIOExceptionRate() {
return randomIOExceptionRate;
}
- void maybeThrowIOException() throws IOException {
- maybeThrowIOException(null);
+ /**
+ * If 0.0, no exceptions will be thrown during openInput
+ * and createOutput. Else this should
+ * be a double 0.0 - 1.0 and we will randomly throw an
+ * IOException in openInput and createOutput with
+ * this probability.
+ */
+ public void setRandomIOExceptionRateOnOpen(double rate) {
+ randomIOExceptionRateOnOpen = rate;
+ }
+
+ public double getRandomIOExceptionRateOnOpen() {
+ return randomIOExceptionRateOnOpen;
}
void maybeThrowIOException(String message) throws IOException {
- if (randomIOExceptionRate > 0.0) {
- int number = Math.abs(randomState.nextInt() % 1000);
- if (number < randomIOExceptionRate*1000) {
- if (LuceneTestCase.VERBOSE) {
- System.out.println(Thread.currentThread().getName() + ": MockDirectoryWrapper: now throw random exception" + (message == null ? "" : " (" + message + ")"));
- new Throwable().printStackTrace(System.out);
- }
- throw new IOException("a random IOException" + (message == null ? "" : "(" + message + ")"));
+ if (randomState.nextDouble() < randomIOExceptionRate) {
+ if (LuceneTestCase.VERBOSE) {
+ System.out.println(Thread.currentThread().getName() + ": MockDirectoryWrapper: now throw random exception" + (message == null ? "" : " (" + message + ")"));
+ new Throwable().printStackTrace(System.out);
+ }
+ throw new IOException("a random IOException" + (message == null ? "" : " (" + message + ")"));
+ }
+ }
+
+ void maybeThrowIOExceptionOnOpen(String name) throws IOException {
+ if (randomState.nextDouble() < randomIOExceptionRateOnOpen) {
+ if (LuceneTestCase.VERBOSE) {
+ System.out.println(Thread.currentThread().getName() + ": MockDirectoryWrapper: now throw random exception during open file=" + name);
+ new Throwable().printStackTrace(System.out);
+ }
+ if (randomState.nextBoolean()) {
+ throw new IOException("a random IOException (" + name + ")");
+ } else {
+ throw new FileNotFoundException("a random IOException (" + name + ")");
}
}
}
@@ -403,22 +450,28 @@ public class MockDirectoryWrapper extend
@Override
public synchronized IndexOutput createOutput(String name, IOContext context) throws IOException {
+ maybeThrowDeterministicException();
+ maybeThrowIOExceptionOnOpen(name);
maybeYield();
if (failOnCreateOutput) {
maybeThrowDeterministicException();
}
- if (crashed)
+ if (crashed) {
throw new IOException("cannot createOutput after crash");
+ }
init();
synchronized(this) {
- if (preventDoubleWrite && createdFiles.contains(name) && !name.equals("segments.gen"))
+ if (preventDoubleWrite && createdFiles.contains(name) && !name.equals("segments.gen")) {
throw new IOException("file \"" + name + "\" was already written to");
+ }
}
- if (noDeleteOpenFile && openFiles.containsKey(name))
+ if (noDeleteOpenFile && openFiles.containsKey(name)) {
throw new IOException("MockDirectoryWrapper: file \"" + name + "\" is still open: cannot overwrite");
+ }
- if (crashed)
+ if (crashed) {
throw new IOException("cannot createOutput after crash");
+ }
unSyncedFiles.add(name);
createdFiles.add(name);
@@ -428,9 +481,9 @@ public class MockDirectoryWrapper extend
RAMFile existing = ramdir.fileMap.get(name);
// Enforce write once:
- if (existing!=null && !name.equals("segments.gen") && preventDoubleWrite)
+ if (existing!=null && !name.equals("segments.gen") && preventDoubleWrite) {
throw new IOException("file " + name + " already exists");
- else {
+ } else {
if (existing!=null) {
ramdir.sizeInBytes.getAndAdd(-existing.sizeInBytes);
existing.directory = null;
@@ -452,7 +505,7 @@ public class MockDirectoryWrapper extend
if (throttling == Throttling.ALWAYS ||
(throttling == Throttling.SOMETIMES && randomState.nextInt(50) == 0) && !(delegate instanceof RateLimitedDirectoryWrapper)) {
if (LuceneTestCase.VERBOSE) {
- System.out.println("MockDirectoryWrapper: throttling indexOutput");
+ System.out.println("MockDirectoryWrapper: throttling indexOutput (" + name + ")");
}
return throttledOutput.newFromDelegate(io);
} else {
@@ -484,6 +537,8 @@ public class MockDirectoryWrapper extend
@Override
public synchronized IndexInput openInput(String name, IOContext context) throws IOException {
+ maybeThrowDeterministicException();
+ maybeThrowIOExceptionOnOpen(name);
maybeYield();
if (failOnOpenInput) {
maybeThrowDeterministicException();
@@ -587,13 +642,16 @@ public class MockDirectoryWrapper extend
if (noDeleteOpenFile && openLocks.size() > 0) {
throw new RuntimeException("MockDirectoryWrapper: cannot close: there are still open locks: " + openLocks);
}
+
isOpen = false;
if (getCheckIndexOnClose()) {
- if (indexPossiblyExists()) {
+ randomIOExceptionRate = 0.0;
+ randomIOExceptionRateOnOpen = 0.0;
+ if (DirectoryReader.indexExists(this)) {
if (LuceneTestCase.VERBOSE) {
System.out.println("\nNOTE: MockDirectoryWrapper: now crash");
}
- crash(); // corrumpt any unsynced-files
+ crash(); // corrupt any unsynced-files
if (LuceneTestCase.VERBOSE) {
System.out.println("\nNOTE: MockDirectoryWrapper: now run CheckIndex");
}
@@ -793,7 +851,7 @@ public class MockDirectoryWrapper extend
}
}
}
-
+
@Override
public synchronized String[] listAll() throws IOException {
maybeYield();