You are viewing a plain text version of this content. The canonical link for it is here.
Posted to java-commits@lucene.apache.org by mi...@apache.org on 2008/05/09 14:04:47 UTC
svn commit: r654774 - in /lucene/java/trunk/src:
java/org/apache/lucene/index/ test/org/apache/lucene/index/
test/org/apache/lucene/util/
Author: mikemccand
Date: Fri May 9 05:04:46 2008
New Revision: 654774
URL: http://svn.apache.org/viewvc?rev=654774&view=rev
Log:
LUCENE-510: fix backwards compatibility bug when bulk-merging stored fields from pre-UTF8 segments that contain non-ascii stored fields
Modified:
lucene/java/trunk/src/java/org/apache/lucene/index/FieldsReader.java
lucene/java/trunk/src/java/org/apache/lucene/index/SegmentMerger.java
lucene/java/trunk/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java
lucene/java/trunk/src/test/org/apache/lucene/index/index.23.cfs.zip
lucene/java/trunk/src/test/org/apache/lucene/index/index.23.nocfs.zip
lucene/java/trunk/src/test/org/apache/lucene/util/_TestUtil.java
Modified: lucene/java/trunk/src/java/org/apache/lucene/index/FieldsReader.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/FieldsReader.java?rev=654774&r1=654773&r2=654774&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/FieldsReader.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/FieldsReader.java Fri May 9 05:04:46 2008
@@ -172,6 +172,10 @@
indexStream.seek(formatSize + (docID + docStoreOffset) * 8L);
}
+ boolean canReadRawDocs() {
+ return format >= FieldsWriter.FORMAT_VERSION_UTF8_LENGTH_IN_BYTES;
+ }
+
final Document doc(int n, FieldSelector fieldSelector) throws CorruptIndexException, IOException {
seekIndex(n);
long position = indexStream.readLong();
Modified: lucene/java/trunk/src/java/org/apache/lucene/index/SegmentMerger.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/SegmentMerger.java?rev=654774&r1=654773&r2=654774&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/SegmentMerger.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/SegmentMerger.java Fri May 9 05:04:46 2008
@@ -302,8 +302,14 @@
final FieldsReader matchingFieldsReader;
final boolean hasMatchingReader;
if (matchingSegmentReader != null) {
- hasMatchingReader = true;
- matchingFieldsReader = matchingSegmentReader.getFieldsReader();
+ final FieldsReader fieldsReader = matchingSegmentReader.getFieldsReader();
+ if (fieldsReader != null && !fieldsReader.canReadRawDocs()) {
+ matchingFieldsReader = null;
+ hasMatchingReader = false;
+ } else {
+ matchingFieldsReader = fieldsReader;
+ hasMatchingReader = true;
+ }
} else {
hasMatchingReader = false;
matchingFieldsReader = null;
Modified: lucene/java/trunk/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java?rev=654774&r1=654773&r2=654774&view=diff
==============================================================================
--- lucene/java/trunk/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java (original)
+++ lucene/java/trunk/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java Fri May 9 05:04:46 2008
@@ -129,6 +129,22 @@
"23.nocfs",
};
+ public void testOptimizeOldIndex() throws IOException {
+ for(int i=0;i<oldNames.length;i++) {
+ String dirName = "src/test/org/apache/lucene/index/index." + oldNames[i];
+ unzip(dirName, oldNames[i]);
+ String fullPath = fullDir(oldNames[i]);
+ Directory dir = FSDirectory.getDirectory(fullPath);
+ IndexWriter w = new IndexWriter(dir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.LIMITED);
+ w.optimize();
+ w.close();
+
+ _TestUtil.checkIndex(dir);
+ dir.close();
+ rmDir(oldNames[i]);
+ }
+ }
+
public void testSearchOldIndex() throws IOException {
for(int i=0;i<oldNames.length;i++) {
String dirName = "src/test/org/apache/lucene/index/index." + oldNames[i];
@@ -190,12 +206,15 @@
Document d = reader.document(i);
List fields = d.getFields();
if (oldName.startsWith("23.")) {
- assertEquals(3, fields.size());
+ assertEquals(4, fields.size());
Field f = (Field) d.getField("id");
assertEquals(""+i, f.stringValue());
f = (Field) d.getField("utf8");
assertEquals("Lu\uD834\uDD1Ece\uD834\uDD60ne \u0000 \u2620 ab\ud917\udc17cd", f.stringValue());
+
+ f = (Field) d.getField("autf8");
+ assertEquals("Lu\uD834\uDD1Ece\uD834\uDD60ne \u0000 \u2620 ab\ud917\udc17cd", f.stringValue());
f = (Field) d.getField("content2");
assertEquals("here is more content with aaa aaa aaa", f.stringValue());
@@ -214,7 +233,11 @@
testHits(hits, 34, searcher.getIndexReader());
- if (oldName.startsWith("23.")) {
+ if (!oldName.startsWith("19.") &&
+ !oldName.startsWith("20.") &&
+ !oldName.startsWith("21.") &&
+ !oldName.startsWith("22.")) {
+ // Test on indices >= 2.3
hits = searcher.search(new TermQuery(new Term("utf8", "\u0000")));
assertEquals(34, hits.length());
hits = searcher.search(new TermQuery(new Term("utf8", "Lu\uD834\uDD1Ece\uD834\uDD60ne")));
@@ -455,6 +478,7 @@
Document doc = new Document();
doc.add(new Field("content", "aaa", Field.Store.NO, Field.Index.TOKENIZED));
doc.add(new Field("id", Integer.toString(id), Field.Store.YES, Field.Index.UN_TOKENIZED));
+ doc.add(new Field("autf8", "Lu\uD834\uDD1Ece\uD834\uDD60ne \u0000 \u2620 ab\ud917\udc17cd", Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.WITH_POSITIONS_OFFSETS));
doc.add(new Field("utf8", "Lu\uD834\uDD1Ece\uD834\uDD60ne \u0000 \u2620 ab\ud917\udc17cd", Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.WITH_POSITIONS_OFFSETS));
doc.add(new Field("content2", "here is more content with aaa aaa aaa", Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.WITH_POSITIONS_OFFSETS));
writer.addDocument(doc);
Modified: lucene/java/trunk/src/test/org/apache/lucene/index/index.23.cfs.zip
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/index/index.23.cfs.zip?rev=654774&r1=654773&r2=654774&view=diff
==============================================================================
Binary files - no diff available.
Modified: lucene/java/trunk/src/test/org/apache/lucene/index/index.23.nocfs.zip
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/index/index.23.nocfs.zip?rev=654774&r1=654773&r2=654774&view=diff
==============================================================================
Binary files - no diff available.
Modified: lucene/java/trunk/src/test/org/apache/lucene/util/_TestUtil.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/util/_TestUtil.java?rev=654774&r1=654773&r2=654774&view=diff
==============================================================================
--- lucene/java/trunk/src/test/org/apache/lucene/util/_TestUtil.java (original)
+++ lucene/java/trunk/src/test/org/apache/lucene/util/_TestUtil.java Fri May 9 05:04:46 2008
@@ -54,13 +54,16 @@
((ConcurrentMergeScheduler) ms).sync();
}
+ /** This runs the CheckIndex tool on the index in. If any
+ * issues are hit, a RuntimeException is thrown; else,
+ * true is returned. */
public static boolean checkIndex(Directory dir) throws IOException {
ByteArrayOutputStream bos = new ByteArrayOutputStream(1024);
CheckIndex.out = new PrintStream(bos);
if (!CheckIndex.check(dir, false, null)) {
System.out.println("CheckIndex failed");
System.out.println(bos.toString());
- return false;
+ throw new RuntimeException("CheckIndex failed");
} else
return true;
}