You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by si...@apache.org on 2011/05/13 13:18:25 UTC
svn commit: r1102677 [4/6] - in /lucene/dev/branches/docvalues: ./
dev-tools/eclipse/ dev-tools/maven/
dev-tools/maven/solr/contrib/dataimporthandler/src/extras/
dev-tools/maven/solr/src/ dev-tools/maven/solr/src/solrj/
dev-tools/scripts/ lucene/ lucen...
Modified: lucene/dev/branches/docvalues/lucene/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java?rev=1102677&r1=1102676&r2=1102677&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java Fri May 13 11:18:19 2011
@@ -22,6 +22,7 @@ import java.io.File;
import java.io.IOException;
import java.io.PrintStream;
import java.util.Arrays;
+import java.util.ArrayList;
import java.util.List;
import java.util.Random;
@@ -41,10 +42,12 @@ import org.apache.lucene.search.Similari
import org.apache.lucene.search.SimilarityProvider;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util._TestUtil;
+import org.apache.lucene.util.Constants;
/*
Verify we can read the pre-4.0 file format, do searches
@@ -63,26 +66,27 @@ public class TestBackwardsCompatibility
// oldNames array.
/*
- public void testCreatePreLocklessCFS() throws IOException {
- createIndex("index.cfs", true);
+ public void testCreateCFS() throws IOException {
+ createIndex("index.cfs", true, false);
}
- public void testCreatePreLocklessNoCFS() throws IOException {
- createIndex("index.nocfs", false);
+ public void testCreateNoCFS() throws IOException {
+ createIndex("index.nocfs", false, false);
}
*/
-
+
/*
- public void testCreateCFS() throws IOException {
- String dirName = "testindex.cfs";
- createIndex(dirName, true);
- rmDir(dirName);
+ // These are only needed for the special upgrade test to verify
+ // that also optimized indexes are correctly upgraded by IndexUpgrader.
+ // You don't need them to be build for non-3.1 (the test is happy with just one
+ // "old" segment format, version is unimportant:
+
+ public void testCreateOptimizedCFS() throws IOException {
+ createIndex("index.optimized.cfs", true, true);
}
- public void testCreateNoCFS() throws IOException {
- String dirName = "testindex.nocfs";
- createIndex(dirName, true);
- rmDir(dirName);
+ public void testCreateOptimizedNoCFS() throws IOException {
+ createIndex("index.optimized.nocfs", false, true);
}
*/
@@ -90,6 +94,8 @@ public class TestBackwardsCompatibility
"30.nocfs",
"31.cfs",
"31.nocfs",
+ "32.cfs",
+ "32.nocfs",
};
final String[] unsupportedNames = {"19.cfs",
@@ -108,6 +114,10 @@ public class TestBackwardsCompatibility
"29.nocfs",
};
+ final String[] oldOptimizedNames = {"31.optimized.cfs",
+ "31.optimized.nocfs",
+ };
+
/** This test checks that *only* IndexFormatTooOldExceptions are throws when you open and operate on too old indexes! */
public void testUnsupportedOldIndexes() throws Exception {
for(int i=0;i<unsupportedNames.length;i++) {
@@ -445,9 +455,9 @@ public class TestBackwardsCompatibility
dir.close();
}
- public File createIndex(Random random, String dirName, boolean doCFS) throws IOException {
-
- File indexDir = _TestUtil.getTempDir(dirName);
+ public File createIndex(String dirName, boolean doCFS, boolean optimized) throws IOException {
+ // we use a real directory name that is not cleaned up, because this method is only used to create backwards indexes:
+ File indexDir = new File(LuceneTestCase.TEMP_DIR, dirName);
_TestUtil.rmDir(indexDir);
Directory dir = newFSDirectory(indexDir);
@@ -459,24 +469,30 @@ public class TestBackwardsCompatibility
addDoc(writer, i);
}
assertEquals("wrong doc count", 35, writer.maxDoc());
+ if (optimized) {
+ writer.optimize();
+ }
writer.close();
- // open fresh writer so we get no prx file in the added segment
- conf = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMaxBufferedDocs(10);
- ((LogMergePolicy) conf.getMergePolicy()).setUseCompoundFile(doCFS);
- writer = new IndexWriter(dir, conf);
- addNoProxDoc(writer);
- writer.close();
+ if (!optimized) {
+ // open fresh writer so we get no prx file in the added segment
+ conf = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMaxBufferedDocs(10);
+ ((LogMergePolicy) conf.getMergePolicy()).setUseCompoundFile(doCFS);
+ writer = new IndexWriter(dir, conf);
+ addNoProxDoc(writer);
+ writer.close();
- // Delete one doc so we get a .del file:
- IndexReader reader = IndexReader.open(dir, false);
- Term searchTerm = new Term("id", "7");
- int delCount = reader.deleteDocuments(searchTerm);
- assertEquals("didn't delete the right number of documents", 1, delCount);
+ // Delete one doc so we get a .del file:
+ IndexReader reader = IndexReader.open(dir, false);
+ Term searchTerm = new Term("id", "7");
+ int delCount = reader.deleteDocuments(searchTerm);
+ assertEquals("didn't delete the right number of documents", 1, delCount);
- // Set one norm so we get a .s0 file:
- reader.setNorm(21, "content", conf.getSimilarityProvider().get("content").encodeNormValue(1.5f));
- reader.close();
+ // Set one norm so we get a .s0 file:
+ reader.setNorm(21, "content", conf.getSimilarityProvider().get("content").encodeNormValue(1.5f));
+ reader.close();
+ }
+
dir.close();
return indexDir;
@@ -692,5 +708,98 @@ public class TestBackwardsCompatibility
_TestUtil.rmDir(oldIndexDir);
}
}
+
+ private int checkAllSegmentsUpgraded(Directory dir) throws IOException {
+ final SegmentInfos infos = new SegmentInfos();
+ infos.read(dir);
+ if (VERBOSE) {
+ System.out.println("checkAllSegmentsUpgraded: " + infos);
+ }
+ for (SegmentInfo si : infos) {
+ assertEquals(Constants.LUCENE_MAIN_VERSION, si.getVersion());
+ }
+ return infos.size();
+ }
+
+ private int getNumberOfSegments(Directory dir) throws IOException {
+ final SegmentInfos infos = new SegmentInfos();
+ infos.read(dir);
+ return infos.size();
+ }
+
+ public void testUpgradeOldIndex() throws Exception {
+ List<String> names = new ArrayList<String>(oldNames.length + oldOptimizedNames.length);
+ names.addAll(Arrays.asList(oldNames));
+ names.addAll(Arrays.asList(oldOptimizedNames));
+ for(String name : names) {
+ if (VERBOSE) {
+ System.out.println("testUpgradeOldIndex: index=" +name);
+ }
+ File oldIndxeDir = _TestUtil.getTempDir(name);
+ _TestUtil.unzip(getDataFile("index." + name + ".zip"), oldIndxeDir);
+ Directory dir = newFSDirectory(oldIndxeDir);
+
+ new IndexUpgrader(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, null), VERBOSE ? System.out : null, false)
+ .upgrade();
+
+ checkAllSegmentsUpgraded(dir);
+
+ _TestUtil.checkIndex(dir);
+
+ dir.close();
+ _TestUtil.rmDir(oldIndxeDir);
+ }
+ }
+
+ public void testUpgradeOldOptimizedIndexWithAdditions() throws Exception {
+ for (String name : oldOptimizedNames) {
+ if (VERBOSE) {
+ System.out.println("testUpgradeOldOptimizedIndexWithAdditions: index=" +name);
+ }
+ File oldIndxeDir = _TestUtil.getTempDir(name);
+ _TestUtil.unzip(getDataFile("index." + name + ".zip"), oldIndxeDir);
+ Directory dir = newFSDirectory(oldIndxeDir);
+
+ assertEquals("Original index must be optimized", 1, getNumberOfSegments(dir));
+
+ // create a bunch of dummy segments
+ int id = 40;
+ RAMDirectory ramDir = new RAMDirectory();
+ for (int i = 0; i < 3; i++) {
+ // only use Log- or TieredMergePolicy, to make document addition predictable and not suddenly merge:
+ MergePolicy mp = random.nextBoolean() ? newLogMergePolicy() : newTieredMergePolicy();
+ IndexWriterConfig iwc = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random))
+ .setMergePolicy(mp);
+ IndexWriter w = new IndexWriter(ramDir, iwc);
+ // add few more docs:
+ for(int j = 0; j < RANDOM_MULTIPLIER * random.nextInt(30); j++) {
+ addDoc(w, id++);
+ }
+ w.close(false);
+ }
+
+ // add dummy segments (which are all in current version) to optimized index
+ MergePolicy mp = random.nextBoolean() ? newLogMergePolicy() : newTieredMergePolicy();
+ IndexWriterConfig iwc = new IndexWriterConfig(TEST_VERSION_CURRENT, null)
+ .setMergePolicy(mp);
+ IndexWriter w = new IndexWriter(dir, iwc);
+ w.setInfoStream(VERBOSE ? System.out : null);
+ w.addIndexes(ramDir);
+ w.close(false);
+
+ // determine count of segments in modified index
+ final int origSegCount = getNumberOfSegments(dir);
+
+ new IndexUpgrader(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, null), VERBOSE ? System.out : null, false)
+ .upgrade();
+
+ final int segCount = checkAllSegmentsUpgraded(dir);
+ assertEquals("Index must still contain the same number of segments, as only one segment was upgraded and nothing else merged",
+ origSegCount, segCount);
+
+ dir.close();
+ _TestUtil.rmDir(oldIndxeDir);
+ }
+ }
}
Modified: lucene/dev/branches/docvalues/lucene/src/test/org/apache/lucene/index/TestCodecs.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/test/org/apache/lucene/index/TestCodecs.java?rev=1102677&r1=1102676&r2=1102677&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/test/org/apache/lucene/index/TestCodecs.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/test/org/apache/lucene/index/TestCodecs.java Fri May 13 11:18:19 2011
@@ -241,8 +241,7 @@ public class TestCodecs extends LuceneTe
final Directory dir = newDirectory();
FieldInfos clonedFieldInfos = (FieldInfos) fieldInfos.clone();
this.write(fieldInfos, dir, fields, true);
- final SegmentInfo si = new SegmentInfo(SEGMENT, 10000, dir, false, true, clonedFieldInfos.buildSegmentCodecs(false), clonedFieldInfos.hasVectors(), clonedFieldInfos);
- si.setHasProx(false);
+ final SegmentInfo si = new SegmentInfo(SEGMENT, 10000, dir, false, clonedFieldInfos.buildSegmentCodecs(false), clonedFieldInfos);
final FieldsProducer reader = si.getSegmentCodecs().codec().fieldsProducer(new SegmentReadState(dir, si, fieldInfos, 64, IndexReader.DEFAULT_TERMS_INDEX_DIVISOR));
@@ -294,7 +293,7 @@ public class TestCodecs extends LuceneTe
FieldInfos clonedFieldInfos = (FieldInfos) fieldInfos.clone();
this.write(fieldInfos, dir, fields, false);
- final SegmentInfo si = new SegmentInfo(SEGMENT, 10000, dir, false, true, clonedFieldInfos.buildSegmentCodecs(false), clonedFieldInfos.hasVectors(), clonedFieldInfos);
+ final SegmentInfo si = new SegmentInfo(SEGMENT, 10000, dir, false, clonedFieldInfos.buildSegmentCodecs(false), clonedFieldInfos);
if (VERBOSE) {
System.out.println("TEST: now read postings");
Modified: lucene/dev/branches/docvalues/lucene/src/test/org/apache/lucene/index/TestDoc.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/test/org/apache/lucene/index/TestDoc.java?rev=1102677&r1=1102676&r2=1102677&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/test/org/apache/lucene/index/TestDoc.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/test/org/apache/lucene/index/TestDoc.java Fri May 13 11:18:19 2011
@@ -196,7 +196,7 @@ public class TestDoc extends LuceneTestC
SegmentReader r1 = SegmentReader.get(true, si1, IndexReader.DEFAULT_TERMS_INDEX_DIVISOR);
SegmentReader r2 = SegmentReader.get(true, si2, IndexReader.DEFAULT_TERMS_INDEX_DIVISOR);
- SegmentMerger merger = new SegmentMerger(si1.dir, IndexWriterConfig.DEFAULT_TERM_INDEX_INTERVAL, merged, null, CodecProvider.getDefault(), null, new FieldInfos());
+ SegmentMerger merger = new SegmentMerger(si1.dir, IndexWriterConfig.DEFAULT_TERM_INDEX_INTERVAL, merged, null, null, new FieldInfos());
merger.add(r1);
merger.add(r2);
@@ -205,8 +205,7 @@ public class TestDoc extends LuceneTestC
r2.close();
final FieldInfos fieldInfos = merger.fieldInfos();
final SegmentInfo info = new SegmentInfo(merged, si1.docCount + si2.docCount, si1.dir,
- false, fieldInfos.hasProx(), merger.getSegmentCodecs(),
- fieldInfos.hasVectors(), fieldInfos);
+ false, merger.getSegmentCodecs(), fieldInfos);
if (useCompoundFile) {
Collection<String> filesToDelete = merger.createCompoundFile(merged + ".cfs", info);
Modified: lucene/dev/branches/docvalues/lucene/src/test/org/apache/lucene/index/TestFieldsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/test/org/apache/lucene/index/TestFieldsReader.java?rev=1102677&r1=1102676&r2=1102677&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/test/org/apache/lucene/index/TestFieldsReader.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/test/org/apache/lucene/index/TestFieldsReader.java Fri May 13 11:18:19 2011
@@ -24,12 +24,14 @@ import java.util.*;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
+import org.apache.lucene.document.NumericField;
import org.apache.lucene.document.FieldSelector;
import org.apache.lucene.document.FieldSelectorResult;
import org.apache.lucene.document.Fieldable;
import org.apache.lucene.document.LoadFirstFieldSelector;
import org.apache.lucene.document.SetBasedFieldSelector;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
+import org.apache.lucene.search.FieldCache;
import org.apache.lucene.store.AlreadyClosedException;
import org.apache.lucene.store.BufferedIndexInput;
import org.apache.lucene.store.Directory;
@@ -511,4 +513,69 @@ public class TestFieldsReader extends Lu
}
}
+
+ public void testNumericField() throws Exception {
+ Directory dir = newDirectory();
+ RandomIndexWriter w = new RandomIndexWriter(random, dir);
+ final int numDocs = _TestUtil.nextInt(random, 500, 1000) * RANDOM_MULTIPLIER;
+ final Number[] answers = new Number[numDocs];
+ final NumericField.DataType[] typeAnswers = new NumericField.DataType[numDocs];
+ for(int id=0;id<numDocs;id++) {
+ Document doc = new Document();
+ NumericField nf = new NumericField("nf", Field.Store.YES, false);
+ doc.add(nf);
+ final Number answer;
+ final NumericField.DataType typeAnswer;
+ if (random.nextBoolean()) {
+ // float/double
+ if (random.nextBoolean()) {
+ final float f = random.nextFloat();
+ nf.setFloatValue(f);
+ answer = Float.valueOf(f);
+ typeAnswer = NumericField.DataType.FLOAT;
+ } else {
+ final double d = random.nextDouble();
+ nf.setDoubleValue(d);
+ answer = Double.valueOf(d);
+ typeAnswer = NumericField.DataType.DOUBLE;
+ }
+ } else {
+ // int/long
+ if (random.nextBoolean()) {
+ final int i = random.nextInt();
+ nf.setIntValue(i);
+ answer = Integer.valueOf(i);
+ typeAnswer = NumericField.DataType.INT;
+ } else {
+ final long l = random.nextLong();
+ nf.setLongValue(l);
+ answer = Long.valueOf(l);
+ typeAnswer = NumericField.DataType.LONG;
+ }
+ }
+ answers[id] = answer;
+ typeAnswers[id] = typeAnswer;
+ doc.add(new NumericField("id", Integer.MAX_VALUE, Field.Store.NO, true).setIntValue(id));
+ w.addDocument(doc);
+ }
+ final IndexReader r = w.getReader();
+ w.close();
+
+ assertEquals(numDocs, r.numDocs());
+
+ for(IndexReader sub : r.getSequentialSubReaders()) {
+ final int[] ids = FieldCache.DEFAULT.getInts(sub, "id");
+ for(int docID=0;docID<sub.numDocs();docID++) {
+ final Document doc = sub.document(docID);
+ final Fieldable f = doc.getFieldable("nf");
+ assertTrue("got f=" + f, f instanceof NumericField);
+ final NumericField nf = (NumericField) f;
+ assertEquals(answers[ids[docID]], nf.getNumericValue());
+ assertSame(typeAnswers[ids[docID]], nf.getDataType());
+ }
+ }
+ r.close();
+ dir.close();
+ }
+
}
Modified: lucene/dev/branches/docvalues/lucene/src/test/org/apache/lucene/index/TestFlushByRamOrCountsPolicy.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/test/org/apache/lucene/index/TestFlushByRamOrCountsPolicy.java?rev=1102677&r1=1102676&r2=1102677&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/test/org/apache/lucene/index/TestFlushByRamOrCountsPolicy.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/test/org/apache/lucene/index/TestFlushByRamOrCountsPolicy.java Fri May 13 11:18:19 2011
@@ -233,8 +233,7 @@ public class TestFlushByRamOrCountsPolic
AtomicInteger numDocs = new AtomicInteger(numDocumentsToIndex);
MockDirectoryWrapper dir = newDirectory();
// mock a very slow harddisk here so that flushing is very slow
- dir.setThrottledIndexOutput(new ThrottledIndexOutput(ThrottledIndexOutput
- .mBitsToBytes(40 + random.nextInt(10)), 5 + random.nextInt(5), null));
+ dir.setThrottling(MockDirectoryWrapper.Throttling.ALWAYS);
IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT,
new MockAnalyzer(random));
iwc.setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH);
Modified: lucene/dev/branches/docvalues/lucene/src/test/org/apache/lucene/index/TestIndexWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/test/org/apache/lucene/index/TestIndexWriter.java?rev=1102677&r1=1102676&r2=1102677&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/test/org/apache/lucene/index/TestIndexWriter.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/test/org/apache/lucene/index/TestIndexWriter.java Fri May 13 11:18:19 2011
@@ -18,7 +18,6 @@ package org.apache.lucene.index;
*/
import java.io.ByteArrayOutputStream;
-import java.io.File;
import java.io.IOException;
import java.io.PrintStream;
import java.io.Reader;
@@ -1130,10 +1129,12 @@ public class TestIndexWriter extends Luc
while(true) {
MergePolicy.OneMerge merge = writer.getNextMerge();
- if (merge == null)
+ if (merge == null) {
break;
- for(int i=0;i<merge.segments.size();i++)
- assert merge.segments.info(i).docCount < 20;
+ }
+ for(int i=0;i<merge.segments.size();i++) {
+ assert merge.segments.get(i).docCount < 20;
+ }
writer.merge(merge);
}
}
@@ -1603,7 +1604,7 @@ public class TestIndexWriter extends Luc
// LUCENE-510
public void testInvalidUTF16() throws Throwable {
Directory dir = newDirectory();
- IndexWriter w = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)));
+ IndexWriter w = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new StringSplitAnalyzer()));
Document doc = new Document();
final int count = utf8Data.length/2;
@@ -2141,6 +2142,7 @@ public class TestIndexWriter extends Luc
while(true) {
if (w != null) {
w.close();
+ w = null;
}
IndexWriterConfig conf = newIndexWriterConfig(
TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMaxBufferedDocs(2);
@@ -2186,10 +2188,12 @@ public class TestIndexWriter extends Luc
if (!failed) {
// clear interrupt state:
Thread.interrupted();
- try {
- w.rollback();
- } catch (IOException ioe) {
- throw new RuntimeException(ioe);
+ if (w != null) {
+ try {
+ w.rollback();
+ } catch (IOException ioe) {
+ throw new RuntimeException(ioe);
+ }
}
try {
Modified: lucene/dev/branches/docvalues/lucene/src/test/org/apache/lucene/index/TestIndexWriterExceptions.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/test/org/apache/lucene/index/TestIndexWriterExceptions.java?rev=1102677&r1=1102676&r2=1102677&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/test/org/apache/lucene/index/TestIndexWriterExceptions.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/test/org/apache/lucene/index/TestIndexWriterExceptions.java Fri May 13 11:18:19 2011
@@ -33,9 +33,7 @@ import org.apache.lucene.search.DocIdSet
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
-import org.apache.lucene.store.LockObtainFailedException;
import org.apache.lucene.store.MockDirectoryWrapper;
-import org.apache.lucene.store.MockDirectoryWrapper.Failure;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.MockAnalyzer;
@@ -155,7 +153,9 @@ public class TestIndexWriterExceptions e
}
MockDirectoryWrapper dir = newDirectory();
- MockIndexWriter writer = new MockIndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))
+ MockAnalyzer analyzer = new MockAnalyzer(random);
+ analyzer.setEnableChecks(false); // disable workflow checking as we forcefully close() in exceptional cases.
+ MockIndexWriter writer = new MockIndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, analyzer)
.setRAMBufferSizeMB(0.1).setMergeScheduler(new ConcurrentMergeScheduler()));
((ConcurrentMergeScheduler) writer.getConfig().getMergeScheduler()).setSuppressExceptions();
//writer.setMaxBufferedDocs(10);
@@ -201,7 +201,9 @@ public class TestIndexWriterExceptions e
public void testRandomExceptionsThreads() throws Throwable {
MockDirectoryWrapper dir = newDirectory();
- MockIndexWriter writer = new MockIndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))
+ MockAnalyzer analyzer = new MockAnalyzer(random);
+ analyzer.setEnableChecks(false); // disable workflow checking as we forcefully close() in exceptional cases.
+ MockIndexWriter writer = new MockIndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, analyzer)
.setRAMBufferSizeMB(0.2).setMergeScheduler(new ConcurrentMergeScheduler()));
((ConcurrentMergeScheduler) writer.getConfig().getMergeScheduler()).setSuppressExceptions();
//writer.setMaxBufferedDocs(10);
@@ -321,7 +323,9 @@ public class TestIndexWriterExceptions e
Analyzer analyzer = new Analyzer() {
@Override
public TokenStream tokenStream(String fieldName, Reader reader) {
- return new CrashingFilter(fieldName, new MockTokenizer(reader, MockTokenizer.WHITESPACE, false));
+ MockTokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ tokenizer.setEnableChecks(false); // disable workflow checking as we forcefully close() in exceptional cases.
+ return new CrashingFilter(fieldName, tokenizer);
}
};
@@ -390,7 +394,9 @@ public class TestIndexWriterExceptions e
@Override
public TokenStream tokenStream(String fieldName, Reader reader) {
- return new TokenFilter(new MockTokenizer(reader, MockTokenizer.SIMPLE, true)) {
+ MockTokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.SIMPLE, true);
+ tokenizer.setEnableChecks(false); // disable workflow checking as we forcefully close() in exceptional cases.
+ return new TokenFilter(tokenizer) {
private int count = 0;
@Override
@@ -522,7 +528,9 @@ public class TestIndexWriterExceptions e
Analyzer analyzer = new Analyzer() {
@Override
public TokenStream tokenStream(String fieldName, Reader reader) {
- return new CrashingFilter(fieldName, new MockTokenizer(reader, MockTokenizer.WHITESPACE, false));
+ MockTokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ tokenizer.setEnableChecks(false); // disable workflow checking as we forcefully close() in exceptional cases.
+ return new CrashingFilter(fieldName, tokenizer);
}
};
@@ -621,7 +629,9 @@ public class TestIndexWriterExceptions e
Analyzer analyzer = new Analyzer() {
@Override
public TokenStream tokenStream(String fieldName, Reader reader) {
- return new CrashingFilter(fieldName, new MockTokenizer(reader, MockTokenizer.WHITESPACE, false));
+ MockTokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ tokenizer.setEnableChecks(false); // disable workflow checking as we forcefully close() in exceptional cases.
+ return new CrashingFilter(fieldName, tokenizer);
}
};
@@ -1167,4 +1177,105 @@ public class TestIndexWriterExceptions e
writer.close();
dir.close();
}
+
+ public void testTermVectorExceptions() throws IOException {
+ FailOnTermVectors[] failures = new FailOnTermVectors[] {
+ new FailOnTermVectors(FailOnTermVectors.AFTER_INIT_STAGE),
+ new FailOnTermVectors(FailOnTermVectors.INIT_STAGE), };
+ for (int j = 0; j < 3 * RANDOM_MULTIPLIER; j++) {
+ for (FailOnTermVectors failure : failures) {
+ MockDirectoryWrapper dir = newDirectory();
+ IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(
+ TEST_VERSION_CURRENT, new MockAnalyzer(random)));
+ dir.failOn(failure);
+ int numDocs = 10 + random.nextInt(30);
+ for (int i = 0; i < numDocs; i++) {
+ Document doc = new Document();
+ Field field = newField(random, "field", "a field", Field.Store.YES,
+ Field.Index.ANALYZED);
+ doc.add(field);
+ // random TV
+ try {
+ w.addDocument(doc);
+ assertFalse(field.isTermVectorStored());
+ } catch (RuntimeException e) {
+ assertTrue(e.getMessage().startsWith(FailOnTermVectors.EXC_MSG));
+ }
+ if (random.nextInt(20) == 0) {
+ w.commit();
+ _TestUtil.checkIndex(dir);
+ }
+
+ }
+ Document document = new Document();
+ document.add(new Field("field", "a field", Field.Store.YES,
+ Field.Index.ANALYZED));
+ w.addDocument(document);
+
+ for (int i = 0; i < numDocs; i++) {
+ Document doc = new Document();
+ Field field = newField(random, "field", "a field", Field.Store.YES,
+ Field.Index.ANALYZED);
+ doc.add(field);
+ // random TV
+ try {
+ w.addDocument(doc);
+ assertFalse(field.isTermVectorStored());
+ } catch (RuntimeException e) {
+ assertTrue(e.getMessage().startsWith(FailOnTermVectors.EXC_MSG));
+ }
+ if (random.nextInt(20) == 0) {
+ w.commit();
+ _TestUtil.checkIndex(dir);
+ }
+ }
+ document = new Document();
+ document.add(new Field("field", "a field", Field.Store.YES,
+ Field.Index.ANALYZED));
+ w.addDocument(document);
+ w.close();
+ IndexReader reader = IndexReader.open(dir);
+ assertTrue(reader.numDocs() > 0);
+ reader.close();
+ SegmentInfos sis = new SegmentInfos();
+ sis.read(dir);
+ for (SegmentInfo segmentInfo : sis) {
+ assertFalse(segmentInfo.getHasVectors());
+ }
+ dir.close();
+
+ }
+ }
+ }
+
+ private static class FailOnTermVectors extends MockDirectoryWrapper.Failure {
+
+ private static final String INIT_STAGE = "initTermVectorsWriter";
+ private static final String AFTER_INIT_STAGE = "finishDocument";
+ private static final String EXC_MSG = "FOTV";
+ private final String stage;
+
+ public FailOnTermVectors(String stage) {
+ this.stage = stage;
+ }
+
+ @Override
+ public void eval(MockDirectoryWrapper dir) throws IOException {
+ StackTraceElement[] trace = new Exception().getStackTrace();
+ boolean failOnInit = false;
+ boolean failOnfinish = false;
+ for (int i = 0; i < trace.length; i++) {
+ if ("org.apache.lucene.index.TermVectorsTermsWriter".equals(trace[i].getClassName()) && stage.equals(trace[i].getMethodName()))
+ failOnInit = true;
+ if ("org.apache.lucene.index.TermVectorsTermsWriter".equals(trace[i].getClassName()) && stage.equals(trace[i].getMethodName()))
+ failOnfinish = true;
+ }
+
+ if (failOnInit) {
+ throw new RuntimeException(EXC_MSG + " fail on init");
+ } else if (failOnfinish) {
+ throw new RuntimeException(EXC_MSG + " fail on finishDoc");
+ }
+ }
+ }
}
Modified: lucene/dev/branches/docvalues/lucene/src/test/org/apache/lucene/index/TestLongPostings.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/test/org/apache/lucene/index/TestLongPostings.java?rev=1102677&r1=1102676&r2=1102677&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/test/org/apache/lucene/index/TestLongPostings.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/test/org/apache/lucene/index/TestLongPostings.java Fri May 13 11:18:19 2011
@@ -49,6 +49,7 @@ public class TestLongPostings extends Lu
final TermToBytesRefAttribute termAtt = ts.getAttribute(TermToBytesRefAttribute.class);
final BytesRef termBytes = termAtt.getBytesRef();
int count = 0;
+ ts.reset();
while(ts.incrementToken()) {
termAtt.fillBytesRef();
if (count == 0 && !termBytes.utf8ToString().equals(s)) {
Modified: lucene/dev/branches/docvalues/lucene/src/test/org/apache/lucene/index/TestSegmentMerger.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/test/org/apache/lucene/index/TestSegmentMerger.java?rev=1102677&r1=1102676&r2=1102677&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/test/org/apache/lucene/index/TestSegmentMerger.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/test/org/apache/lucene/index/TestSegmentMerger.java Fri May 13 11:18:19 2011
@@ -73,15 +73,15 @@ public class TestSegmentMerger extends L
}
public void testMerge() throws IOException {
- SegmentMerger merger = new SegmentMerger(mergedDir, IndexWriterConfig.DEFAULT_TERM_INDEX_INTERVAL, mergedSegment, null, CodecProvider.getDefault(), null, new FieldInfos());
+ SegmentMerger merger = new SegmentMerger(mergedDir, IndexWriterConfig.DEFAULT_TERM_INDEX_INTERVAL, mergedSegment, null, null, new FieldInfos());
merger.add(reader1);
merger.add(reader2);
int docsMerged = merger.merge();
assertTrue(docsMerged == 2);
final FieldInfos fieldInfos = merger.fieldInfos();
//Should be able to open a new SegmentReader against the new directory
- SegmentReader mergedReader = SegmentReader.get(false, mergedDir, new SegmentInfo(mergedSegment, docsMerged, mergedDir, false, fieldInfos.hasProx(),
- merger.getSegmentCodecs(), fieldInfos.hasVectors(), fieldInfos),
+ SegmentReader mergedReader = SegmentReader.get(false, mergedDir, new SegmentInfo(mergedSegment, docsMerged, mergedDir, false,
+ merger.getSegmentCodecs(), fieldInfos),
BufferedIndexInput.BUFFER_SIZE, true, IndexReader.DEFAULT_TERMS_INDEX_DIVISOR);
assertTrue(mergedReader != null);
assertTrue(mergedReader.numDocs() == 2);
Modified: lucene/dev/branches/docvalues/lucene/src/test/org/apache/lucene/index/TestStressIndexing2.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/test/org/apache/lucene/index/TestStressIndexing2.java?rev=1102677&r1=1102676&r2=1102677&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/test/org/apache/lucene/index/TestStressIndexing2.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/test/org/apache/lucene/index/TestStressIndexing2.java Fri May 13 11:18:19 2011
@@ -616,7 +616,7 @@ public class TestStressIndexing2 extends
}
for(int i=start;i<end;i++) {
- int t = nextInt(6);
+ int t = nextInt(5);
if (0 == t && i < end-1) {
// Make a surrogate pair
// High surrogate
@@ -631,13 +631,6 @@ public class TestStressIndexing2 extends
buffer[i] = (char) nextInt(0x800, 0xd800);
else if (4 == t)
buffer[i] = (char) nextInt(0xe000, 0xffff);
- else if (5 == t) {
- // Illegal unpaired surrogate
- if (r.nextBoolean())
- buffer[i] = (char) nextInt(0xd800, 0xdc00);
- else
- buffer[i] = (char) nextInt(0xdc00, 0xe000);
- }
}
buffer[end] = ' ';
return 1+end;
Modified: lucene/dev/branches/docvalues/lucene/src/test/org/apache/lucene/index/TestTermVectorsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/test/org/apache/lucene/index/TestTermVectorsWriter.java?rev=1102677&r1=1102676&r2=1102677&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/test/org/apache/lucene/index/TestTermVectorsWriter.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/test/org/apache/lucene/index/TestTermVectorsWriter.java Fri May 13 11:18:19 2011
@@ -121,7 +121,9 @@ public class TestTermVectorsWriter exten
Analyzer analyzer = new MockAnalyzer(random);
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, analyzer));
Document doc = new Document();
- TokenStream stream = new CachingTokenFilter(analyzer.tokenStream("field", new StringReader("abcd ")));
+ TokenStream stream = analyzer.tokenStream("field", new StringReader("abcd "));
+ stream.reset(); // TODO: wierd to reset before wrapping with CachingTokenFilter... correct?
+ stream = new CachingTokenFilter(stream);
Field f = new Field("field", stream, Field.TermVector.WITH_POSITIONS_OFFSETS);
doc.add(f);
doc.add(f);
Modified: lucene/dev/branches/docvalues/lucene/src/test/org/apache/lucene/index/index.31.cfs.zip
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/test/org/apache/lucene/index/index.31.cfs.zip?rev=1102677&r1=1102676&r2=1102677&view=diff
==============================================================================
Binary files - no diff available.
Modified: lucene/dev/branches/docvalues/lucene/src/test/org/apache/lucene/index/index.31.nocfs.zip
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/test/org/apache/lucene/index/index.31.nocfs.zip?rev=1102677&r1=1102676&r2=1102677&view=diff
==============================================================================
Binary files - no diff available.
Modified: lucene/dev/branches/docvalues/lucene/src/test/org/apache/lucene/search/TestMultiPhraseQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/test/org/apache/lucene/search/TestMultiPhraseQuery.java?rev=1102677&r1=1102676&r2=1102677&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/test/org/apache/lucene/search/TestMultiPhraseQuery.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/test/org/apache/lucene/search/TestMultiPhraseQuery.java Fri May 13 11:18:19 2011
@@ -25,14 +25,22 @@ import org.apache.lucene.index.MultiFiel
import org.apache.lucene.search.Explanation.IDFExplanation;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
-
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.LuceneTestCase;
import java.io.IOException;
import java.util.Collection;
import java.util.LinkedList;
+import java.io.Reader;
/**
* This class tests the MultiPhraseQuery class.
@@ -333,4 +341,97 @@ public class TestMultiPhraseQuery extend
reader.close();
indexStore.close();
}
+
+ private static class TokenAndPos {
+ public final String token;
+ public final int pos;
+ public TokenAndPos(String token, int pos) {
+ this.token = token;
+ this.pos = pos;
+ }
+ }
+
+ private static class CannedAnalyzer extends Analyzer {
+ private final TokenAndPos[] tokens;
+
+ public CannedAnalyzer(TokenAndPos[] tokens) {
+ this.tokens = tokens;
+ }
+
+ @Override
+ public TokenStream tokenStream(String fieldName, Reader reader) {
+ return new CannedTokenizer(tokens);
+ }
+ }
+
+ private static class CannedTokenizer extends Tokenizer {
+ private final TokenAndPos[] tokens;
+ private int upto = 0;
+ private int lastPos = 0;
+ private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
+ private final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class);
+
+ public CannedTokenizer(TokenAndPos[] tokens) {
+ this.tokens = tokens;
+ }
+
+ @Override
+ public final boolean incrementToken() throws IOException {
+ clearAttributes();
+ if (upto < tokens.length) {
+ final TokenAndPos token = tokens[upto++];
+ termAtt.setEmpty();
+ termAtt.append(token.token);
+ posIncrAtt.setPositionIncrement(token.pos - lastPos);
+ lastPos = token.pos;
+ return true;
+ } else {
+ return false;
+ }
+ }
+ }
+
+ public void testZeroPosIncr() throws IOException {
+ Directory dir = new RAMDirectory();
+ final TokenAndPos[] tokens = new TokenAndPos[3];
+ tokens[0] = new TokenAndPos("a", 0);
+ tokens[1] = new TokenAndPos("b", 0);
+ tokens[2] = new TokenAndPos("c", 0);
+
+ RandomIndexWriter writer = new RandomIndexWriter(random, dir, new CannedAnalyzer(tokens));
+ Document doc = new Document();
+ doc.add(new Field("field", "", Field.Store.NO, Field.Index.ANALYZED));
+ writer.addDocument(doc);
+ writer.addDocument(doc);
+ IndexReader r = writer.getReader();
+ writer.close();
+ IndexSearcher s = new IndexSearcher(r);
+ MultiPhraseQuery mpq = new MultiPhraseQuery();
+ //mpq.setSlop(1);
+
+ // NOTE: not great that if we do the else clause here we
+ // get different scores! MultiPhraseQuery counts that
+ // phrase as occurring twice per doc (it should be 1, I
+ // think?). This is because MultipleTermPositions is able to
+ // return the same position more than once (0, in this
+ // case):
+ if (true) {
+ mpq.add(new Term[] {new Term("field", "b"), new Term("field", "c")}, 0);
+ mpq.add(new Term[] {new Term("field", "a")}, 0);
+ } else {
+ mpq.add(new Term[] {new Term("field", "a")}, 0);
+ mpq.add(new Term[] {new Term("field", "b"), new Term("field", "c")}, 0);
+ }
+ TopDocs hits = s.search(mpq, 2);
+ assert hits.totalHits == 2;
+ assertEquals(hits.scoreDocs[0].score, hits.scoreDocs[1].score, 1e-5);
+ /*
+ for(int hit=0;hit<hits.totalHits;hit++) {
+ ScoreDoc sd = hits.scoreDocs[hit];
+ System.out.println(" hit doc=" + sd.doc + " score=" + sd.score);
+ }
+ */
+ r.close();
+ dir.close();
+ }
}
Modified: lucene/dev/branches/docvalues/lucene/src/test/org/apache/lucene/search/TestPhraseQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/test/org/apache/lucene/search/TestPhraseQuery.java?rev=1102677&r1=1102676&r2=1102677&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/test/org/apache/lucene/search/TestPhraseQuery.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/test/org/apache/lucene/search/TestPhraseQuery.java Fri May 13 11:18:19 2011
@@ -626,11 +626,14 @@ public class TestPhraseQuery extends Luc
}
TokenStream ts = analyzer.reusableTokenStream("ignore", new StringReader(term));
CharTermAttribute termAttr = ts.addAttribute(CharTermAttribute.class);
+ ts.reset();
while(ts.incrementToken()) {
String text = termAttr.toString();
doc.add(text);
sb.append(text).append(' ');
}
+ ts.end();
+ ts.close();
} else {
// pick existing sub-phrase
List<String> lastDoc = docs.get(r.nextInt(docs.size()));
Modified: lucene/dev/branches/docvalues/lucene/src/test/org/apache/lucene/search/TestTimeLimitingCollector.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/test/org/apache/lucene/search/TestTimeLimitingCollector.java?rev=1102677&r1=1102676&r2=1102677&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/test/org/apache/lucene/search/TestTimeLimitingCollector.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/test/org/apache/lucene/search/TestTimeLimitingCollector.java Fri May 13 11:18:19 2011
@@ -75,7 +75,7 @@ public class TestTimeLimitingCollector e
"blueberry pizza",
};
directory = newDirectory();
- RandomIndexWriter iw = new RandomIndexWriter(random, directory);
+ RandomIndexWriter iw = new RandomIndexWriter(random, directory, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMergePolicy(newLogMergePolicy()));
for (int i=0; i<N_DOCS; i++) {
add(docText[i%docText.length], iw);
Modified: lucene/dev/branches/docvalues/lucene/src/test/org/apache/lucene/util/TestArrayUtil.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/test/org/apache/lucene/util/TestArrayUtil.java?rev=1102677&r1=1102676&r2=1102677&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/test/org/apache/lucene/util/TestArrayUtil.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/test/org/apache/lucene/util/TestArrayUtil.java Fri May 13 11:18:19 2011
@@ -144,6 +144,24 @@ public class TestArrayUtil extends Lucen
}
}
+ private Integer[] createSparseRandomArray(int maxSize) {
+ final Integer[] a = new Integer[random.nextInt(maxSize) + 1];
+ for (int i = 0; i < a.length; i++) {
+ a[i] = Integer.valueOf(random.nextInt(2));
+ }
+ return a;
+ }
+
+ // This is a test for LUCENE-3054 (which fails without the merge sort fall back with stack overflow in most cases)
+ public void testQuickToMergeSortFallback() {
+ for (int i = 0, c = 500 * RANDOM_MULTIPLIER; i < c; i++) {
+ Integer[] a1 = createSparseRandomArray(40000), a2 = a1.clone();
+ ArrayUtil.quickSort(a1);
+ Arrays.sort(a2);
+ assertArrayEquals(a2, a1);
+ }
+ }
+
public void testMergeSort() {
for (int i = 0, c = 500 * RANDOM_MULTIPLIER; i < c; i++) {
Integer[] a1 = createRandomArray(1000), a2 = a1.clone();
Modified: lucene/dev/branches/docvalues/lucene/src/test/org/apache/lucene/util/TestBytesRefHash.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/test/org/apache/lucene/util/TestBytesRefHash.java?rev=1102677&r1=1102676&r2=1102677&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/test/org/apache/lucene/util/TestBytesRefHash.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/test/org/apache/lucene/util/TestBytesRefHash.java Fri May 13 11:18:19 2011
@@ -135,6 +135,7 @@ public class TestBytesRefHash extends Lu
public void testCompact() {
BytesRef ref = new BytesRef();
for (int j = 0; j < 2 * RANDOM_MULTIPLIER; j++) {
+ int numEntries = 0;
final int size = 797;
BitSet bits = new BitSet(size);
for (int i = 0; i < size; i++) {
@@ -143,13 +144,21 @@ public class TestBytesRefHash extends Lu
str = _TestUtil.randomRealisticUnicodeString(random, 1000);
} while (str.length() == 0);
ref.copy(str);
- bits.set(hash.add(ref));
-
+ final int key = hash.add(ref);
+ if (key < 0) {
+ assertTrue(bits.get((-key)-1));
+ } else {
+ assertFalse(bits.get(key));
+ bits.set(key);
+ numEntries++;
+ }
}
assertEquals(hash.size(), bits.cardinality());
+ assertEquals(numEntries, bits.cardinality());
+ assertEquals(numEntries, hash.size());
int[] compact = hash.compact();
- assertTrue(size < compact.length);
- for (int i = 0; i < size; i++) {
+ assertTrue(numEntries < compact.length);
+ for (int i = 0; i < numEntries; i++) {
bits.set(compact[i], false);
}
assertEquals(0, bits.cardinality());
Modified: lucene/dev/branches/docvalues/lucene/src/test/org/apache/lucene/util/automaton/fst/TestFSTs.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/test/org/apache/lucene/util/automaton/fst/TestFSTs.java?rev=1102677&r1=1102676&r2=1102677&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/test/org/apache/lucene/util/automaton/fst/TestFSTs.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/test/org/apache/lucene/util/automaton/fst/TestFSTs.java Fri May 13 11:18:19 2011
@@ -288,6 +288,36 @@ public class TestFSTs extends LuceneTest
}
new FSTTester<IntsRef>(random, dir, inputMode, pairs, outputs).doTest();
}
+
+ // Up to two positive ints, shared, generally but not
+ // monotonically increasing
+ {
+ if (VERBOSE) {
+ System.out.println("TEST: now test UpToTwoPositiveIntOutputs");
+ }
+ final UpToTwoPositiveIntOutputs outputs = UpToTwoPositiveIntOutputs.getSingleton(true);
+ final List<FSTTester.InputOutput<Object>> pairs = new ArrayList<FSTTester.InputOutput<Object>>(terms.length);
+ long lastOutput = 0;
+ for(int idx=0;idx<terms.length;idx++) {
+ // Sometimes go backwards
+ long value = lastOutput + _TestUtil.nextInt(random, -100, 1000);
+ while(value < 0) {
+ value = lastOutput + _TestUtil.nextInt(random, -100, 1000);
+ }
+ final Object output;
+ if (random.nextInt(5) == 3) {
+ long value2 = lastOutput + _TestUtil.nextInt(random, -100, 1000);
+ while(value2 < 0) {
+ value2 = lastOutput + _TestUtil.nextInt(random, -100, 1000);
+ }
+ output = outputs.get(value, value2);
+ } else {
+ output = outputs.get(value);
+ }
+ pairs.add(new FSTTester.InputOutput<Object>(terms[idx], output));
+ }
+ new FSTTester<Object>(random, dir, inputMode, pairs, outputs).doTest();
+ }
}
private static class FSTTester<T> {
@@ -328,11 +358,13 @@ public class TestFSTs extends LuceneTest
// no pruning
doTest(0, 0);
- // simple pruning
- doTest(_TestUtil.nextInt(random, 1, 1+pairs.size()), 0);
-
- // leafy pruning
- doTest(0, _TestUtil.nextInt(random, 1, 1+pairs.size()));
+ if (!(outputs instanceof UpToTwoPositiveIntOutputs)) {
+ // simple pruning
+ doTest(_TestUtil.nextInt(random, 1, 1+pairs.size()), 0);
+
+ // leafy pruning
+ doTest(0, _TestUtil.nextInt(random, 1, 1+pairs.size()));
+ }
}
// runs the term, returning the output, or null if term
@@ -421,7 +453,14 @@ public class TestFSTs extends LuceneTest
prune1==0 && prune2==0, outputs);
for(InputOutput<T> pair : pairs) {
- builder.add(pair.input, pair.output);
+ if (pair.output instanceof UpToTwoPositiveIntOutputs.TwoLongs) {
+ final UpToTwoPositiveIntOutputs _outputs = (UpToTwoPositiveIntOutputs) outputs;
+ final UpToTwoPositiveIntOutputs.TwoLongs twoLongs = (UpToTwoPositiveIntOutputs.TwoLongs) pair.output;
+ ((Builder<Object>) builder).add(pair.input, (Object) _outputs.get(twoLongs.first));
+ ((Builder<Object>) builder).add(pair.input, (Object) _outputs.get(twoLongs.second));
+ } else {
+ builder.add(pair.input, pair.output);
+ }
}
FST<T> fst = builder.finish();
Modified: lucene/dev/branches/docvalues/modules/analysis/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/modules/analysis/CHANGES.txt?rev=1102677&r1=1102676&r2=1102677&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/modules/analysis/CHANGES.txt (original)
+++ lucene/dev/branches/docvalues/modules/analysis/CHANGES.txt Fri May 13 11:18:19 2011
@@ -83,6 +83,8 @@ New Features
- o.a.l.analysis.ReusableAnalyzerBase -> o.a.l.analysis.util.ReusableAnalyzerBase
- o.a.l.analysis.StopwordAnalyzerBase -> o.a.l.analysis.util.StopwordAnalyzerBase
- o.a.l.analysis.WordListLoader -> o.a.l.analysis.util.WordListLoader
+ - o.a.l.analysis.CharTokenizer -> o.a.l.analysis.util.CharTokenizer
+ - o.a.l.util.CharacterUtils -> o.a.l.analysis.util.CharacterUtils
* SOLR-1057: Add PathHierarchyTokenizer that represents file path hierarchies as synonyms of
/something, /something/something, /something/something/else. (Ryan McKinley, Koji Sekiguchi)
Modified: lucene/dev/branches/docvalues/modules/analysis/common/src/java/org/apache/lucene/analysis/ar/ArabicLetterTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/modules/analysis/common/src/java/org/apache/lucene/analysis/ar/ArabicLetterTokenizer.java?rev=1102677&r1=1102676&r2=1102677&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/modules/analysis/common/src/java/org/apache/lucene/analysis/ar/ArabicLetterTokenizer.java (original)
+++ lucene/dev/branches/docvalues/modules/analysis/common/src/java/org/apache/lucene/analysis/ar/ArabicLetterTokenizer.java Fri May 13 11:18:19 2011
@@ -18,8 +18,8 @@ package org.apache.lucene.analysis.ar;
import java.io.Reader;
-import org.apache.lucene.analysis.CharTokenizer;
import org.apache.lucene.analysis.core.LetterTokenizer;
+import org.apache.lucene.analysis.util.CharTokenizer;
import org.apache.lucene.analysis.standard.StandardTokenizer; // javadoc @link
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.Version;
Modified: lucene/dev/branches/docvalues/modules/analysis/common/src/java/org/apache/lucene/analysis/core/LetterTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/modules/analysis/common/src/java/org/apache/lucene/analysis/core/LetterTokenizer.java?rev=1102677&r1=1102676&r2=1102677&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/modules/analysis/common/src/java/org/apache/lucene/analysis/core/LetterTokenizer.java (original)
+++ lucene/dev/branches/docvalues/modules/analysis/common/src/java/org/apache/lucene/analysis/core/LetterTokenizer.java Fri May 13 11:18:19 2011
@@ -19,8 +19,8 @@ package org.apache.lucene.analysis.core;
import java.io.Reader;
-import org.apache.lucene.analysis.CharTokenizer;
import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.util.CharTokenizer;
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.Version;
Modified: lucene/dev/branches/docvalues/modules/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/modules/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseFilter.java?rev=1102677&r1=1102676&r2=1102677&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/modules/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseFilter.java (original)
+++ lucene/dev/branches/docvalues/modules/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseFilter.java Fri May 13 11:18:19 2011
@@ -22,7 +22,7 @@ import java.io.IOException;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-import org.apache.lucene.util.CharacterUtils;
+import org.apache.lucene.analysis.util.CharacterUtils;
import org.apache.lucene.util.Version;
/**
Modified: lucene/dev/branches/docvalues/modules/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/modules/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseTokenizer.java?rev=1102677&r1=1102676&r2=1102677&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/modules/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseTokenizer.java (original)
+++ lucene/dev/branches/docvalues/modules/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseTokenizer.java Fri May 13 11:18:19 2011
@@ -19,8 +19,8 @@ package org.apache.lucene.analysis.core;
import java.io.Reader;
-import org.apache.lucene.analysis.CharTokenizer;
import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.util.CharTokenizer;
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.Version;
Modified: lucene/dev/branches/docvalues/modules/analysis/common/src/java/org/apache/lucene/analysis/core/SimpleAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/modules/analysis/common/src/java/org/apache/lucene/analysis/core/SimpleAnalyzer.java?rev=1102677&r1=1102676&r2=1102677&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/modules/analysis/common/src/java/org/apache/lucene/analysis/core/SimpleAnalyzer.java (original)
+++ lucene/dev/branches/docvalues/modules/analysis/common/src/java/org/apache/lucene/analysis/core/SimpleAnalyzer.java Fri May 13 11:18:19 2011
@@ -20,7 +20,7 @@ package org.apache.lucene.analysis.core;
import java.io.Reader;
import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.CharTokenizer;
+import org.apache.lucene.analysis.util.CharTokenizer;
import org.apache.lucene.analysis.util.ReusableAnalyzerBase;
import org.apache.lucene.util.Version;
Modified: lucene/dev/branches/docvalues/modules/analysis/common/src/java/org/apache/lucene/analysis/core/WhitespaceAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/modules/analysis/common/src/java/org/apache/lucene/analysis/core/WhitespaceAnalyzer.java?rev=1102677&r1=1102676&r2=1102677&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/modules/analysis/common/src/java/org/apache/lucene/analysis/core/WhitespaceAnalyzer.java (original)
+++ lucene/dev/branches/docvalues/modules/analysis/common/src/java/org/apache/lucene/analysis/core/WhitespaceAnalyzer.java Fri May 13 11:18:19 2011
@@ -19,7 +19,7 @@ package org.apache.lucene.analysis.core;
import java.io.Reader;
-import org.apache.lucene.analysis.CharTokenizer;
+import org.apache.lucene.analysis.util.CharTokenizer;
import org.apache.lucene.analysis.util.ReusableAnalyzerBase;
import org.apache.lucene.util.Version;
Modified: lucene/dev/branches/docvalues/modules/analysis/common/src/java/org/apache/lucene/analysis/core/WhitespaceTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/modules/analysis/common/src/java/org/apache/lucene/analysis/core/WhitespaceTokenizer.java?rev=1102677&r1=1102676&r2=1102677&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/modules/analysis/common/src/java/org/apache/lucene/analysis/core/WhitespaceTokenizer.java (original)
+++ lucene/dev/branches/docvalues/modules/analysis/common/src/java/org/apache/lucene/analysis/core/WhitespaceTokenizer.java Fri May 13 11:18:19 2011
@@ -19,8 +19,8 @@ package org.apache.lucene.analysis.core;
import java.io.Reader;
-import org.apache.lucene.analysis.CharTokenizer;
import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.util.CharTokenizer;
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.Version;
Modified: lucene/dev/branches/docvalues/modules/analysis/common/src/java/org/apache/lucene/analysis/el/GreekLowerCaseFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/modules/analysis/common/src/java/org/apache/lucene/analysis/el/GreekLowerCaseFilter.java?rev=1102677&r1=1102676&r2=1102677&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/modules/analysis/common/src/java/org/apache/lucene/analysis/el/GreekLowerCaseFilter.java (original)
+++ lucene/dev/branches/docvalues/modules/analysis/common/src/java/org/apache/lucene/analysis/el/GreekLowerCaseFilter.java Fri May 13 11:18:19 2011
@@ -21,7 +21,7 @@ import java.io.IOException;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-import org.apache.lucene.util.CharacterUtils;
+import org.apache.lucene.analysis.util.CharacterUtils;
import org.apache.lucene.util.Version;
/**
Modified: lucene/dev/branches/docvalues/modules/analysis/common/src/java/org/apache/lucene/analysis/fr/ElisionFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/modules/analysis/common/src/java/org/apache/lucene/analysis/fr/ElisionFilter.java?rev=1102677&r1=1102676&r2=1102677&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/modules/analysis/common/src/java/org/apache/lucene/analysis/fr/ElisionFilter.java (original)
+++ lucene/dev/branches/docvalues/modules/analysis/common/src/java/org/apache/lucene/analysis/fr/ElisionFilter.java Fri May 13 11:18:19 2011
@@ -31,8 +31,6 @@ import org.apache.lucene.util.Version;
/**
* Removes elisions from a {@link TokenStream}. For example, "l'avion" (the plane) will be
* tokenized as "avion" (plane).
- * <p>
- * Note that {@link StandardTokenizer} sees " ' " as a space, and cuts it out.
*
* @see <a href="http://fr.wikipedia.org/wiki/%C3%89lision">Elision in Wikipedia</a>
*/
Modified: lucene/dev/branches/docvalues/modules/analysis/common/src/java/org/apache/lucene/analysis/in/IndicTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/modules/analysis/common/src/java/org/apache/lucene/analysis/in/IndicTokenizer.java?rev=1102677&r1=1102676&r2=1102677&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/modules/analysis/common/src/java/org/apache/lucene/analysis/in/IndicTokenizer.java (original)
+++ lucene/dev/branches/docvalues/modules/analysis/common/src/java/org/apache/lucene/analysis/in/IndicTokenizer.java Fri May 13 11:18:19 2011
@@ -19,7 +19,7 @@ package org.apache.lucene.analysis.in;
import java.io.Reader;
-import org.apache.lucene.analysis.CharTokenizer;
+import org.apache.lucene.analysis.util.CharTokenizer;
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.Version;
Modified: lucene/dev/branches/docvalues/modules/analysis/common/src/java/org/apache/lucene/analysis/it/ItalianAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/modules/analysis/common/src/java/org/apache/lucene/analysis/it/ItalianAnalyzer.java?rev=1102677&r1=1102676&r2=1102677&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/modules/analysis/common/src/java/org/apache/lucene/analysis/it/ItalianAnalyzer.java (original)
+++ lucene/dev/branches/docvalues/modules/analysis/common/src/java/org/apache/lucene/analysis/it/ItalianAnalyzer.java Fri May 13 11:18:19 2011
@@ -19,11 +19,13 @@ package org.apache.lucene.analysis.it;
import java.io.IOException;
import java.io.Reader;
+import java.util.Arrays;
import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.core.LowerCaseFilter;
import org.apache.lucene.analysis.core.StopFilter;
+import org.apache.lucene.analysis.fr.ElisionFilter;
import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
@@ -38,6 +40,14 @@ import org.tartarus.snowball.ext.Italian
/**
* {@link Analyzer} for Italian.
+ * <p>
+ * <a name="version"/>
+ * <p>You must specify the required {@link Version}
+ * compatibility when creating ItalianAnalyzer:
+ * <ul>
+ * <li> As of 3.2, ElisionFilter with a set of Italian
+ * contractions is used by default.
+ * </ul>
*/
public final class ItalianAnalyzer extends StopwordAnalyzerBase {
private final Set<?> stemExclusionSet;
@@ -45,6 +55,13 @@ public final class ItalianAnalyzer exten
/** File containing default Italian stopwords. */
public final static String DEFAULT_STOPWORD_FILE = "italian_stop.txt";
+ private static final CharArraySet DEFAULT_ARTICLES = CharArraySet.unmodifiableSet(
+ new CharArraySet(Version.LUCENE_CURRENT,
+ Arrays.asList(
+ "c", "l", "all", "dall", "dell", "nell", "sull", "coll", "pell",
+ "gl", "agl", "dagl", "degl", "negl", "sugl", "un", "m", "t", "s", "v", "d"
+ ), true));
+
/**
* Returns an unmodifiable instance of the default stop words set.
* @return default stop words set.
@@ -112,7 +129,7 @@ public final class ItalianAnalyzer exten
* @return A
* {@link org.apache.lucene.analysis.util.ReusableAnalyzerBase.TokenStreamComponents}
* built from an {@link StandardTokenizer} filtered with
- * {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
+ * {@link StandardFilter}, {@link ElisionFilter}, {@link LowerCaseFilter}, {@link StopFilter}
* , {@link KeywordMarkerFilter} if a stem exclusion set is
* provided and {@link SnowballFilter}.
*/
@@ -121,6 +138,9 @@ public final class ItalianAnalyzer exten
Reader reader) {
final Tokenizer source = new StandardTokenizer(matchVersion, reader);
TokenStream result = new StandardFilter(matchVersion, source);
+ if (matchVersion.onOrAfter(Version.LUCENE_32)) {
+ result = new ElisionFilter(matchVersion, result, DEFAULT_ARTICLES);
+ }
result = new LowerCaseFilter(matchVersion, result);
result = new StopFilter(matchVersion, result, stopwords);
if(!stemExclusionSet.isEmpty())
Modified: lucene/dev/branches/docvalues/modules/analysis/common/src/java/org/apache/lucene/analysis/path/PathHierarchyTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/modules/analysis/common/src/java/org/apache/lucene/analysis/path/PathHierarchyTokenizer.java?rev=1102677&r1=1102676&r2=1102677&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/modules/analysis/common/src/java/org/apache/lucene/analysis/path/PathHierarchyTokenizer.java (original)
+++ lucene/dev/branches/docvalues/modules/analysis/common/src/java/org/apache/lucene/analysis/path/PathHierarchyTokenizer.java Fri May 13 11:18:19 2011
@@ -25,57 +25,71 @@ import org.apache.lucene.analysis.tokena
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
/**
- *
+ *
* Take something like:
- *
+ *
* <pre>
- * /soemthing/something/else
+ * /something/something/else
* </pre>
- *
+ *
* and make:
- *
+ *
* <pre>
- * /soemthing
- * /soemthing/something
- * /soemthing/something/else
+ * /something
+ * /something/something
+ * /something/something/else
* </pre>
- *
*/
public class PathHierarchyTokenizer extends Tokenizer {
public PathHierarchyTokenizer(Reader input) {
- this(input, DEFAULT_BUFFER_SIZE, DEFAULT_DELIMITER);
+ this(input, DEFAULT_BUFFER_SIZE, DEFAULT_DELIMITER, DEFAULT_DELIMITER, DEFAULT_SKIP);
+ }
+
+ public PathHierarchyTokenizer(Reader input, int skip) {
+ this(input, DEFAULT_BUFFER_SIZE, DEFAULT_DELIMITER, DEFAULT_DELIMITER, skip);
}
public PathHierarchyTokenizer(Reader input, int bufferSize, char delimiter) {
- this(input, bufferSize, delimiter, delimiter);
+ this(input, bufferSize, delimiter, delimiter, DEFAULT_SKIP);
}
public PathHierarchyTokenizer(Reader input, char delimiter, char replacement) {
- this(input, DEFAULT_BUFFER_SIZE, delimiter, replacement);
+ this(input, DEFAULT_BUFFER_SIZE, delimiter, replacement, DEFAULT_SKIP);
}
- public PathHierarchyTokenizer(Reader input, int bufferSize, char delimiter, char replacement) {
+ public PathHierarchyTokenizer(Reader input, char delimiter, char replacement, int skip) {
+ this(input, DEFAULT_BUFFER_SIZE, delimiter, replacement, skip);
+ }
+
+ public PathHierarchyTokenizer(Reader input, int bufferSize, char delimiter, char replacement, int skip) {
super(input);
termAtt.resizeBuffer(bufferSize);
+
this.delimiter = delimiter;
this.replacement = replacement;
- endDelimiter = false;
+ this.skip = skip;
resultToken = new StringBuilder(bufferSize);
}
-
+
private static final int DEFAULT_BUFFER_SIZE = 1024;
public static final char DEFAULT_DELIMITER = '/';
+ public static final int DEFAULT_SKIP = 0;
+
private final char delimiter;
private final char replacement;
-
+ private final int skip;
+
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
private final PositionIncrementAttribute posAtt = addAttribute(PositionIncrementAttribute.class);
+ private int startPosition = 0;
private int finalOffset = 0;
- private boolean endDelimiter;
+ private int skipped = 0;
+ private boolean endDelimiter = false;
private StringBuilder resultToken;
+
@Override
public final boolean incrementToken() throws IOException {
clearAttributes();
@@ -97,43 +111,69 @@ public class PathHierarchyTokenizer exte
while (true) {
int c = input.read();
- if( c < 0 ) {
- length += resultToken.length();
- termAtt.setLength(length);
- finalOffset = correctOffset(length);
- offsetAtt.setOffset(correctOffset(0), finalOffset);
- if( added ){
- resultToken.setLength(0);
- resultToken.append(termAtt.buffer(), 0, length);
- }
- return added;
- }
- added = true;
- if( c == delimiter ) {
- if( length > 0 ){
- endDelimiter = true;
- break;
+ if( c < 0 ){
+ if( skipped > skip ) {
+ length += resultToken.length();
+ termAtt.setLength(length);
+ finalOffset = correctOffset(startPosition + length);
+ offsetAtt.setOffset(correctOffset(startPosition), finalOffset);
+ if( added ){
+ resultToken.setLength(0);
+ resultToken.append(termAtt.buffer(), 0, length);
+ }
+ return added;
}
else{
- termAtt.append(replacement);
+ finalOffset = correctOffset(startPosition + length);
+ return false;
+ }
+ }
+ if( !added ){
+ added = true;
+ skipped++;
+ if( skipped > skip ){
+ termAtt.append(c == delimiter ? replacement : (char)c);
length++;
}
+ else {
+ startPosition++;
+ }
}
else {
- termAtt.append((char)c);
- length++;
+ if( c == delimiter ){
+ if( skipped > skip ){
+ endDelimiter = true;
+ break;
+ }
+ skipped++;
+ if( skipped > skip ){
+ termAtt.append(replacement);
+ length++;
+ }
+ else {
+ startPosition++;
+ }
+ }
+ else {
+ if( skipped > skip ){
+ termAtt.append((char)c);
+ length++;
+ }
+ else {
+ startPosition++;
+ }
+ }
}
}
-
length += resultToken.length();
termAtt.setLength(length);
- finalOffset = correctOffset(length);
- offsetAtt.setOffset(correctOffset(0), finalOffset);
+ finalOffset = correctOffset(startPosition + length);
+ offsetAtt.setOffset(correctOffset(startPosition), finalOffset);
resultToken.setLength(0);
resultToken.append(termAtt.buffer(), 0, length);
return true;
}
-
+
@Override
public final void end() {
// set final offset
@@ -146,5 +186,6 @@ public class PathHierarchyTokenizer exte
resultToken.setLength(0);
finalOffset = 0;
endDelimiter = false;
+ skipped = 0;
}
}
Modified: lucene/dev/branches/docvalues/modules/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianLetterTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/modules/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianLetterTokenizer.java?rev=1102677&r1=1102676&r2=1102677&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/modules/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianLetterTokenizer.java (original)
+++ lucene/dev/branches/docvalues/modules/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianLetterTokenizer.java Fri May 13 11:18:19 2011
@@ -18,8 +18,8 @@ package org.apache.lucene.analysis.ru;
*/
import java.io.Reader;
-import org.apache.lucene.analysis.CharTokenizer;
import org.apache.lucene.analysis.Tokenizer; // for javadocs
+import org.apache.lucene.analysis.util.CharTokenizer;
import org.apache.lucene.analysis.core.LetterTokenizer;
import org.apache.lucene.analysis.standard.StandardTokenizer; // for javadocs
import org.apache.lucene.util.AttributeSource;
Modified: lucene/dev/branches/docvalues/modules/analysis/common/src/java/org/apache/lucene/analysis/util/CharArrayMap.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/modules/analysis/common/src/java/org/apache/lucene/analysis/util/CharArrayMap.java?rev=1102677&r1=1102676&r2=1102677&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/modules/analysis/common/src/java/org/apache/lucene/analysis/util/CharArrayMap.java (original)
+++ lucene/dev/branches/docvalues/modules/analysis/common/src/java/org/apache/lucene/analysis/util/CharArrayMap.java Fri May 13 11:18:19 2011
@@ -24,7 +24,7 @@ import java.util.Iterator;
import java.util.Map;
import java.util.Set;
-import org.apache.lucene.util.CharacterUtils;
+import org.apache.lucene.analysis.util.CharacterUtils;
import org.apache.lucene.util.Version;
Modified: lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/it/TestItalianAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/it/TestItalianAnalyzer.java?rev=1102677&r1=1102676&r2=1102677&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/it/TestItalianAnalyzer.java (original)
+++ lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/it/TestItalianAnalyzer.java Fri May 13 11:18:19 2011
@@ -23,6 +23,7 @@ import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.util.Version;
public class TestItalianAnalyzer extends BaseTokenStreamTestCase {
/** This test fails with NPE when the
@@ -55,4 +56,18 @@ public class TestItalianAnalyzer extends
public void testRandomStrings() throws Exception {
checkRandomData(random, new ItalianAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
}
+
+ /** test that the elisionfilter is working */
+ public void testContractions() throws IOException {
+ Analyzer a = new ItalianAnalyzer(TEST_VERSION_CURRENT);
+ assertAnalyzesTo(a, "dell'Italia", new String[] { "ital" });
+ assertAnalyzesTo(a, "l'Italiano", new String[] { "ital" });
+ }
+
+ /** test that we don't enable this before 3.2*/
+ public void testContractionsBackwards() throws IOException {
+ Analyzer a = new ItalianAnalyzer(Version.LUCENE_31);
+ assertAnalyzesTo(a, "dell'Italia", new String[] { "dell'ital" });
+ assertAnalyzesTo(a, "l'Italiano", new String[] { "l'ital" });
+ }
}
Modified: lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/path/TestPathHierarchyTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/path/TestPathHierarchyTokenizer.java?rev=1102677&r1=1102676&r2=1102677&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/path/TestPathHierarchyTokenizer.java (original)
+++ lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/path/TestPathHierarchyTokenizer.java Fri May 13 11:18:19 2011
@@ -127,4 +127,70 @@ public class TestPathHierarchyTokenizer
new int[]{1, 0, 0, 0},
path.length());
}
+
+ public void testBasicSkip() throws Exception {
+ String path = "/a/b/c";
+ PathHierarchyTokenizer t = new PathHierarchyTokenizer( new StringReader(path), 1 );
+ assertTokenStreamContents(t,
+ new String[]{"/b", "/b/c"},
+ new int[]{2, 2},
+ new int[]{4, 6},
+ new int[]{1, 0},
+ path.length());
+ }
+
+ public void testEndOfDelimiterSkip() throws Exception {
+ String path = "/a/b/c/";
+ PathHierarchyTokenizer t = new PathHierarchyTokenizer( new StringReader(path), 1 );
+ assertTokenStreamContents(t,
+ new String[]{"/b", "/b/c", "/b/c/"},
+ new int[]{2, 2, 2},
+ new int[]{4, 6, 7},
+ new int[]{1, 0, 0},
+ path.length());
+ }
+
+ public void testStartOfCharSkip() throws Exception {
+ String path = "a/b/c";
+ PathHierarchyTokenizer t = new PathHierarchyTokenizer( new StringReader(path), 1 );
+ assertTokenStreamContents(t,
+ new String[]{"/b", "/b/c"},
+ new int[]{1, 1},
+ new int[]{3, 5},
+ new int[]{1, 0},
+ path.length());
+ }
+
+ public void testStartOfCharEndOfDelimiterSkip() throws Exception {
+ String path = "a/b/c/";
+ PathHierarchyTokenizer t = new PathHierarchyTokenizer( new StringReader(path), 1 );
+ assertTokenStreamContents(t,
+ new String[]{"/b", "/b/c", "/b/c/"},
+ new int[]{1, 1, 1},
+ new int[]{3, 5, 6},
+ new int[]{1, 0, 0},
+ path.length());
+ }
+
+ public void testOnlyDelimiterSkip() throws Exception {
+ String path = "/";
+ PathHierarchyTokenizer t = new PathHierarchyTokenizer( new StringReader(path), 1 );
+ assertTokenStreamContents(t,
+ new String[]{},
+ new int[]{},
+ new int[]{},
+ new int[]{},
+ path.length());
+ }
+
+ public void testOnlyDelimitersSkip() throws Exception {
+ String path = "//";
+ PathHierarchyTokenizer t = new PathHierarchyTokenizer( new StringReader(path), 1 );
+ assertTokenStreamContents(t,
+ new String[]{"/"},
+ new int[]{1},
+ new int[]{2},
+ new int[]{1},
+ path.length());
+ }
}
Modified: lucene/dev/branches/docvalues/solr/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/solr/CHANGES.txt?rev=1102677&r1=1102676&r2=1102677&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/solr/CHANGES.txt (original)
+++ lucene/dev/branches/docvalues/solr/CHANGES.txt Fri May 13 11:18:19 2011
@@ -198,6 +198,9 @@ Bug Fixes
initialization if the schema.xml contains an analyzer configuration
for a fieldType that does not use TextField. (hossman)
+* SOLR-2467: Fix <analyzer class="..." /> initialization so any errors
+ are logged properly. (hossman)
+
Other Changes
----------------------
@@ -267,6 +270,12 @@ Detailed Change List
New Features
----------------------
+* SOLR-2496: Add ability to specify overwrite and commitWithin as request
+ parameters (e.g. specified in the URL) when using the JSON update format,
+ and added a simplified format for specifying multiple documents.
+ Example: [{"id":"doc1"},{"id":"doc2"}]
+ (yonik)
+
Optimizations
----------------------
@@ -309,6 +318,13 @@ Bug Fixes
did not clear all attributes so they displayed incorrect attribute values for tokens
in later filter stages. (uschindler, rmuir, yonik)
+* SOLR-2493: SolrQueryParser was fixed to not parse the SolrConfig DOM tree on each
+ instantiation which is a huge slowdown. (Stephane Bailliez via uschindler)
+
+* SOLR-2495: The JSON parser could hang on corrupted input and could fail
+ to detect numbers that were too large to fit in a long. (yonik)
+
+
Other Changes
----------------------
Modified: lucene/dev/branches/docvalues/solr/README.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/solr/README.txt?rev=1102677&r1=1102676&r2=1102677&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/solr/README.txt (original)
+++ lucene/dev/branches/docvalues/solr/README.txt Fri May 13 11:18:19 2011
@@ -64,18 +64,18 @@ docs/api/index.html
Instructions for Building Apache Solr from Source
-------------------------------------------------
-1. Download the Java SE 6 JDK (Java Development Kit) or later from http://java.sun.com.
- You will need the JDK installed, and the %JAVA_HOME%\bin directory included
- on your command path. To test this, issue a "java -version" command from your
- shell and verify that the Java version is 1.6 or later.
-
-2. Download the Apache Ant binary distribution (1.7.0 or greater) from http://ant.apache.org.
- You will need Ant installed and the %ANT_HOME%\bin directory included on your
- command path. To test this, issue a "ant -version" command from your
- shell and verify that Ant is available.
+1. Download the Java SE 6 JDK (Java Development Kit) or later from http://java.sun.com/
+ You will need the JDK installed, and the $JAVA_HOME/bin (Windows: %JAVA_HOME%\bin)
+ folder included on your command path. To test this, issue a "java -version" command
+ from your shell (command prompt) and verify that the Java version is 1.6 or later.
+
+2. Download the Apache Ant binary distribution (1.7.0 or greater) from http://ant.apache.org/
+ You will need Ant installed and the $ANT_HOME/bin (Windows: %ANT_HOME%\bin) folder
+ included on your command path. To test this, issue a "ant -version" command from your
+ shell (command prompt) and verify that Ant is available.
-3. Download the Apache Solr distribution, linked from the above
- web site. Expand the distribution to a folder of your choice, e.g. c:\solr.
+3. Download the Apache Solr distribution, linked from the above web site.
+ Unzip the distribution to a folder of your choice, e.g. C:\solr or ~/solr
Alternately, you can obtain a copy of the latest Apache Solr source code
directly from the Subversion repository:
Modified: lucene/dev/branches/docvalues/solr/build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/solr/build.xml?rev=1102677&r1=1102676&r2=1102677&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/solr/build.xml (original)
+++ lucene/dev/branches/docvalues/solr/build.xml Fri May 13 11:18:19 2011
@@ -450,6 +450,7 @@
>
<sysproperty key="java.util.logging.config.file" value="${common-solr.dir}/testlogging.properties"/>
<sysproperty key="tests.luceneMatchVersion" value="${tests.luceneMatchVersion}"/>
+ <sysproperty key="tests.codecprovider" value="${tests.codecprovider}"/>
<sysproperty key="tests.codec" value="${tests.codec}"/>
<sysproperty key="tests.locale" value="${tests.locale}"/>
<sysproperty key="tests.timezone" value="${tests.timezone}"/>
@@ -1020,7 +1021,7 @@
jar.file="lib/commons-csv-1.0-SNAPSHOT-r966014.jar" />
<m2-deploy-with-pom-template pom.xml="lib/apache-solr-noggit-pom.xml.template"
- jar.file="lib/apache-solr-noggit-r944541.jar" />
+ jar.file="lib/apache-solr-noggit-r1099557.jar" />
<m2-deploy-with-pom-template pom.xml="contrib/uima/lib/solr-uima-an-alchemy-pom.xml.template"
jar.file="contrib/uima/lib/uima-an-alchemy-2.3.1-SNAPSHOT-r1062868.jar" />
Modified: lucene/dev/branches/docvalues/solr/common-build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/solr/common-build.xml?rev=1102677&r1=1102676&r2=1102677&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/solr/common-build.xml (original)
+++ lucene/dev/branches/docvalues/solr/common-build.xml Fri May 13 11:18:19 2011
@@ -61,6 +61,7 @@
</condition>
<property name="tests.multiplier" value="1" />
+ <property name="tests.codecprovider" value="random" />
<property name="tests.codec" value="randomPerField" />
<property name="tests.locale" value="random" />
<property name="tests.timezone" value="random" />
Modified: lucene/dev/branches/docvalues/solr/contrib/analysis-extras/build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/solr/contrib/analysis-extras/build.xml?rev=1102677&r1=1102676&r2=1102677&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/solr/contrib/analysis-extras/build.xml (original)
+++ lucene/dev/branches/docvalues/solr/contrib/analysis-extras/build.xml Fri May 13 11:18:19 2011
@@ -146,6 +146,7 @@
>
<sysproperty key="java.util.logging.config.file" value="${common-solr.dir}/testlogging.properties"/>
<sysproperty key="tests.luceneMatchVersion" value="${tests.luceneMatchVersion}"/>
+ <sysproperty key="tests.codecprovider" value="${tests.codecprovider}"/>
<sysproperty key="tests.codec" value="${tests.codec}"/>
<sysproperty key="tests.locale" value="${tests.locale}"/>
<sysproperty key="tests.timezone" value="${tests.timezone}"/>
Modified: lucene/dev/branches/docvalues/solr/contrib/clustering/build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/solr/contrib/clustering/build.xml?rev=1102677&r1=1102676&r2=1102677&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/solr/contrib/clustering/build.xml (original)
+++ lucene/dev/branches/docvalues/solr/contrib/clustering/build.xml Fri May 13 11:18:19 2011
@@ -118,6 +118,7 @@
>
<sysproperty key="java.util.logging.config.file" value="${common-solr.dir}/testlogging.properties"/>
<sysproperty key="tests.luceneMatchVersion" value="${tests.luceneMatchVersion}"/>
+ <sysproperty key="tests.codecprovider" value="${tests.codecprovider}"/>
<sysproperty key="tests.codec" value="${tests.codec}"/>
<sysproperty key="tests.locale" value="${tests.locale}"/>
<sysproperty key="tests.timezone" value="${tests.timezone}"/>
Modified: lucene/dev/branches/docvalues/solr/contrib/dataimporthandler/build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/solr/contrib/dataimporthandler/build.xml?rev=1102677&r1=1102676&r2=1102677&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/solr/contrib/dataimporthandler/build.xml (original)
+++ lucene/dev/branches/docvalues/solr/contrib/dataimporthandler/build.xml Fri May 13 11:18:19 2011
@@ -171,6 +171,7 @@
<sysproperty key="java.util.logging.config.file" value="${common-solr.dir}/testlogging.properties"/>
<sysproperty key="tests.luceneMatchVersion" value="${tests.luceneMatchVersion}"/>
<sysproperty key="tests.codec" value="${tests.codec}"/>
+ <sysproperty key="tests.codecprovider" value="${tests.codecprovider}"/>
<sysproperty key="tests.locale" value="${tests.locale}"/>
<sysproperty key="tests.timezone" value="${tests.timezone}"/>
<sysproperty key="tests.multiplier" value="${tests.multiplier}"/>
@@ -231,6 +232,7 @@
>
<sysproperty key="java.util.logging.config.file" value="${common-solr.dir}/testlogging.properties"/>
<sysproperty key="tests.luceneMatchVersion" value="${tests.luceneMatchVersion}"/>
+ <sysproperty key="tests.codecprovider" value="${tests.codecprovider}"/>
<sysproperty key="tests.codec" value="${tests.codec}"/>
<sysproperty key="tests.locale" value="${tests.locale}"/>
<sysproperty key="tests.timezone" value="${tests.timezone}"/>
Modified: lucene/dev/branches/docvalues/solr/contrib/extraction/build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/solr/contrib/extraction/build.xml?rev=1102677&r1=1102676&r2=1102677&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/solr/contrib/extraction/build.xml (original)
+++ lucene/dev/branches/docvalues/solr/contrib/extraction/build.xml Fri May 13 11:18:19 2011
@@ -115,6 +115,7 @@
>
<sysproperty key="java.util.logging.config.file" value="${common-solr.dir}/testlogging.properties"/>
<sysproperty key="tests.luceneMatchVersion" value="${tests.luceneMatchVersion}"/>
+ <sysproperty key="tests.codecprovider" value="${tests.codecprovider}"/>
<sysproperty key="tests.codec" value="${tests.codec}"/>
<sysproperty key="tests.locale" value="${tests.locale}"/>
<sysproperty key="tests.timezone" value="${tests.timezone}"/>
Modified: lucene/dev/branches/docvalues/solr/contrib/uima/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/solr/contrib/uima/CHANGES.txt?rev=1102677&r1=1102676&r2=1102677&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/solr/contrib/uima/CHANGES.txt (original)
+++ lucene/dev/branches/docvalues/solr/contrib/uima/CHANGES.txt Fri May 13 11:18:19 2011
@@ -28,6 +28,11 @@ Upgrading from Solr 3.1
It should move to UIMAUpdateRequestProcessorFactory setting.
See contrib/uima/README.txt for more details. (SOLR-2436)
+New Features
+----------------------
+
+* SOLR-2503: extend mapping function to map feature value to dynamicField. (koji)
+
Test Cases:
----------------------