You are viewing a plain text version of this content. The canonical link for it is here.
Posted to java-commits@lucene.apache.org by mi...@apache.org on 2009/11/29 21:28:52 UTC
svn commit: r885265 [2/2] - in /lucene/java/branches/flex_1458/src:
java/org/apache/lucene/index/ java/org/apache/lucene/index/codecs/
java/org/apache/lucene/index/codecs/intblock/
java/org/apache/lucene/index/codecs/preflex/ java/org/apache/lucene/ind...
Modified: lucene/java/branches/flex_1458/src/test/org/apache/lucene/index/TestIndexWriter.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/test/org/apache/lucene/index/TestIndexWriter.java?rev=885265&r1=885264&r2=885265&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/src/test/org/apache/lucene/index/TestIndexWriter.java (original)
+++ lucene/java/branches/flex_1458/src/test/org/apache/lucene/index/TestIndexWriter.java Sun Nov 29 20:28:51 2009
@@ -23,11 +23,13 @@
import java.io.PrintStream;
import java.io.Reader;
import java.io.StringReader;
+import java.util.List;
import java.util.ArrayList;
import java.util.Arrays;
+import java.util.Set;
+import java.util.HashSet;
import java.util.HashMap;
import java.util.Iterator;
-import java.util.List;
import java.util.Map;
import java.util.Random;
@@ -542,13 +544,12 @@
RAMDirectory dir = new RAMDirectory();
IndexWriter writer = new IndexWriter(dir, new StandardAnalyzer(org.apache.lucene.util.Version.LUCENE_CURRENT), true, IndexWriter.MaxFieldLength.LIMITED);
- char[] chars = new char[DocumentsWriter.MAX_TERM_LENGTH_UTF8-1];
+ char[] chars = new char[DocumentsWriter.MAX_TERM_LENGTH_UTF8];
Arrays.fill(chars, 'x');
Document doc = new Document();
final String bigTerm = new String(chars);
- // Max length term is 16383, so this contents produces
- // a too-long term:
+ // This produces a too-long term:
String contents = "abc xyz x" + bigTerm + " another term";
doc.add(new Field("content", contents, Field.Store.NO, Field.Index.ANALYZED));
writer.addDocument(doc);
@@ -4611,4 +4612,143 @@
_TestUtil.checkIndex(dir);
dir.close();
}
+
+ // both start & end are inclusive
+ private final int getInt(Random r, int start, int end) {
+ return start + r.nextInt(1+end-start);
+ }
+
+ private void checkTermsOrder(IndexReader r, Set<String> allTerms, boolean isTop) throws IOException {
+ TermsEnum terms = r.fields().terms("f").iterator();
+
+ char[] last = new char[2];
+ int lastLength = 0;
+
+ Set<String> seenTerms = new HashSet<String>();
+
+ UnicodeUtil.UTF16Result utf16 = new UnicodeUtil.UTF16Result();
+ while(true) {
+ final TermRef term = terms.next();
+ if (term == null) {
+ break;
+ }
+ UnicodeUtil.UTF8toUTF16(term.bytes, term.offset, term.length, utf16);
+ assertTrue(utf16.length <= 2);
+
+ // Make sure last term comes before current one, in
+ // UTF16 sort order
+ int i = 0;
+ for(i=0;i<lastLength && i<utf16.length;i++) {
+ assertTrue("UTF16 code unit " + termDesc(new String(utf16.result, 0, utf16.length)) + " incorrectly sorted after code unit " + termDesc(new String(last, 0, lastLength)), last[i] <= utf16.result[i]);
+ if (last[i] < utf16.result[i]) {
+ break;
+ }
+ }
+ // Terms should not have been identical
+ assertTrue(lastLength != utf16.length || i < lastLength);
+
+ final String s = new String(utf16.result, 0, utf16.length);
+ assertTrue("term " + termDesc(s) + " was not added to index (count=" + allTerms.size() + ")", allTerms.contains(s));
+ seenTerms.add(s);
+
+ System.arraycopy(utf16.result, 0, last, 0, utf16.length);
+ lastLength = utf16.length;
+ }
+
+ if (isTop) {
+ assertTrue(allTerms.equals(seenTerms));
+ }
+
+ // Test seeking:
+ Iterator<String> it = seenTerms.iterator();
+ while(it.hasNext()) {
+ TermRef tr = new TermRef(it.next());
+ assertEquals("seek failed for term=" + termDesc(tr.toString()),
+ TermsEnum.SeekStatus.FOUND,
+ terms.seek(tr));
+ }
+ }
+
+ private final String asUnicodeChar(char c) {
+ return "U+" + Integer.toHexString(c);
+ }
+
+ private final String termDesc(String s) {
+ final String s0;
+ assertTrue(s.length() <= 2);
+ if (s.length() == 1) {
+ s0 = asUnicodeChar(s.charAt(0));
+ } else {
+ s0 = asUnicodeChar(s.charAt(0)) + "," + asUnicodeChar(s.charAt(1));
+ }
+ return s0;
+ }
+
+ // Make sure terms, including ones with surrogate pairs,
+ // sort in UTF16 sort order by default
+ public void testTermUTF16SortOrder() throws Throwable {
+ Directory dir = new MockRAMDirectory();
+ IndexWriter writer = new IndexWriter(dir, new SimpleAnalyzer(), IndexWriter.MaxFieldLength.UNLIMITED);
+ Document d = new Document();
+ // Single segment
+ Field f = new Field("f", "", Field.Store.NO, Field.Index.NOT_ANALYZED);
+ d.add(f);
+ char[] chars = new char[2];
+ Random rnd = newRandom();
+ final Set<String> allTerms = new HashSet<String>();
+
+ for(int i=0;i<200;i++) {
+
+ final String s;
+ if (rnd.nextBoolean()) {
+ // Single char
+ if (rnd.nextBoolean()) {
+ // Above surrogates
+ chars[0] = (char) getInt(rnd, 1+UnicodeUtil.UNI_SUR_LOW_END, 0xffff);
+ } else {
+ // Below surrogates
+ chars[0] = (char) getInt(rnd, 0, UnicodeUtil.UNI_SUR_HIGH_START-1);
+ }
+ s = new String(chars, 0, 1);
+ } else {
+ // Surrogate pair
+ chars[0] = (char) getInt(rnd, UnicodeUtil.UNI_SUR_HIGH_START, UnicodeUtil.UNI_SUR_HIGH_END);
+ assertTrue(((int) chars[0]) >= UnicodeUtil.UNI_SUR_HIGH_START && ((int) chars[0]) <= UnicodeUtil.UNI_SUR_HIGH_END);
+ chars[1] = (char) getInt(rnd, UnicodeUtil.UNI_SUR_LOW_START, UnicodeUtil.UNI_SUR_LOW_END);
+ s = new String(chars, 0, 2);
+ }
+ allTerms.add(s);
+ f.setValue(s);
+
+ //System.out.println("add " + termDesc(s));
+ writer.addDocument(d);
+
+ if ((1+i) % 42 == 0) {
+ writer.commit();
+ }
+ }
+
+ IndexReader r = writer.getReader();
+
+ // Test each sub-segment
+ final IndexReader[] subs = r.getSequentialSubReaders();
+ assertEquals(5, subs.length);
+ for(int i=0;i<subs.length;i++) {
+ checkTermsOrder(subs[i], allTerms, false);
+ }
+ checkTermsOrder(r, allTerms, true);
+
+ // Test multi segment
+ r.close();
+
+ writer.optimize();
+
+ // Test optimized single segment
+ r = writer.getReader();
+ checkTermsOrder(r, allTerms, true);
+ r.close();
+
+ writer.close();
+ dir.close();
+ }
}