You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2013/01/31 16:07:51 UTC
svn commit: r1441005 - in /lucene/dev/branches/lucene4547/lucene/core/src:
java/org/apache/lucene/codecs/ java/org/apache/lucene/index/
java/org/apache/lucene/util/ test/org/apache/lucene/codecs/perfield/
test/org/apache/lucene/index/
Author: rmuir
Date: Thu Jan 31 15:07:51 2013
New Revision: 1441005
URL: http://svn.apache.org/viewvc?rev=1441005&view=rev
Log:
clear some nocommits and clean up sortedbytesmerge a bit
Modified:
lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/DocValuesConsumer.java
lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/util/PagedBytes.java
lucene/dev/branches/lucene4547/lucene/core/src/test/org/apache/lucene/codecs/perfield/TestPerFieldDocValuesFormat.java
lucene/dev/branches/lucene4547/lucene/core/src/test/org/apache/lucene/index/Test2BSortedDocValues.java
Modified: lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/DocValuesConsumer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/DocValuesConsumer.java?rev=1441005&r1=1441004&r2=1441005&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/DocValuesConsumer.java (original)
+++ lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/DocValuesConsumer.java Thu Jan 31 15:07:51 2013
@@ -309,8 +309,8 @@ public abstract class DocValuesConsumer
}
}
- // nocommit we can unload the bits to disk to reduce
- // transient ram spike...
+ // TODO: we can unload the bits/packed ints to disk to reduce
+ // transient ram spike... most of these just require iterators
}
// Second pass: merge only the live terms
@@ -337,7 +337,11 @@ public abstract class DocValuesConsumer
lastOrds[readerId] = sourceOrd;
top.ordDeltas.add(delta);
- lastTerm = BytesRef.deepCopyOf(top.scratch);
+ if (lastTerm == null) {
+ lastTerm = BytesRef.deepCopyOf(top.scratch);
+ } else {
+ lastTerm.copyBytes(top.scratch);
+ }
ord++;
}
@@ -360,28 +364,6 @@ public abstract class DocValuesConsumer
state.liveTerms = null;
}
}
-
- /*
- public void finish(SortedDocValuesConsumer consumer) throws IOException {
-
- // Third pass: write merged result
- for(BytesRef term : mergedTerms) {
- consumer.addValue(term);
- }
-
- for(SegmentState segState : segStates) {
- Bits liveDocs = segState.reader.getLiveDocs();
- int maxDoc = segState.reader.maxDoc();
- for(int docID=0;docID<maxDoc;docID++) {
- if (liveDocs == null || liveDocs.get(docID)) {
- int segOrd = segState.values.getOrd(docID);
- int mergedOrd = segState.segOrdToMergedOrd[segOrd];
- consumer.addDoc(mergedOrd);
- }
- }
- }
- }
- */
}
/**
@@ -472,7 +454,7 @@ public abstract class DocValuesConsumer
}
assert nextIsSet;
nextIsSet = false;
- // nocommit make a mutable number
+ // TODO make a mutable number
return nextValue;
}
Modified: lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java?rev=1441005&r1=1441004&r2=1441005&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java (original)
+++ lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java Thu Jan 31 15:07:51 2013
@@ -763,8 +763,6 @@ public class IndexWriter implements Clos
private FieldNumbers getFieldNumberMap() throws IOException {
final FieldNumbers map = new FieldNumbers();
- // nocommit for a 4.0 index that has inconsistent DV
- // types ... this will throw exc on init of IW?
for(SegmentInfoPerCommit info : segmentInfos) {
for(FieldInfo fi : getFieldInfos(info.info)) {
map.addOrGet(fi.name, fi.number, fi.getDocValuesType());
Modified: lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/util/PagedBytes.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/util/PagedBytes.java?rev=1441005&r1=1441004&r2=1441005&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/util/PagedBytes.java (original)
+++ lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/util/PagedBytes.java Thu Jan 31 15:07:51 2013
@@ -30,7 +30,8 @@ import org.apache.lucene.store.IndexInpu
*
* @lucene.internal
**/
-// nocommit: make this simply a big ass array and nothing more.
+// TODO: refactor this, byteblockpool, fst.bytestore, and any
+// other "shift/mask big arrays". there are too many of these classes!
public final class PagedBytes {
private final List<byte[]> blocks = new ArrayList<byte[]>();
private final List<Integer> blockEnd = new ArrayList<Integer>();
@@ -106,7 +107,7 @@ public final class PagedBytes {
*
* @lucene.internal
**/
- // nocommit: move this shit and any other vint bogusness to fieldcacheimpl!
+ // TODO: this really needs to be refactored into fieldcacheimpl
public void fill(BytesRef b, long start) {
final int index = (int) (start >> blockBits);
final int offset = (int) (start & blockMask);
@@ -217,7 +218,7 @@ public final class PagedBytes {
/** Copy bytes in, writing the length as a 1 or 2 byte
* vInt prefix. */
- // nocommit: move this shit and any other vint bogusness to fieldcacheimpl!
+ // TODO: this really needs to be refactored into fieldcacheimpl!
public long copyUsingLengthPrefix(BytesRef bytes) {
if (bytes.length >= 32768) {
throw new IllegalArgumentException("max length is 32767 (got " + bytes.length + ")");
Modified: lucene/dev/branches/lucene4547/lucene/core/src/test/org/apache/lucene/codecs/perfield/TestPerFieldDocValuesFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/core/src/test/org/apache/lucene/codecs/perfield/TestPerFieldDocValuesFormat.java?rev=1441005&r1=1441004&r2=1441005&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/core/src/test/org/apache/lucene/codecs/perfield/TestPerFieldDocValuesFormat.java (original)
+++ lucene/dev/branches/lucene4547/lucene/core/src/test/org/apache/lucene/codecs/perfield/TestPerFieldDocValuesFormat.java Thu Jan 31 15:07:51 2013
@@ -65,8 +65,8 @@ public class TestPerFieldDocValuesFormat
}
// just a simple trivial test
- // nocommit: if we are going to pass down suffixes to segmentread/writestate,
- // then they should be respected by *all* codec apis!
+ // TODO: we should come up with a test that somehow checks that segment suffix
+ // is respected by all codec apis (not just docvalues and postings)
public void testTwoFieldsTwoFormats() throws IOException {
Analyzer analyzer = new MockAnalyzer(random());
Modified: lucene/dev/branches/lucene4547/lucene/core/src/test/org/apache/lucene/index/Test2BSortedDocValues.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/core/src/test/org/apache/lucene/index/Test2BSortedDocValues.java?rev=1441005&r1=1441004&r2=1441005&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/core/src/test/org/apache/lucene/index/Test2BSortedDocValues.java (original)
+++ lucene/dev/branches/lucene4547/lucene/core/src/test/org/apache/lucene/index/Test2BSortedDocValues.java Thu Jan 31 15:07:51 2013
@@ -17,13 +17,12 @@ package org.apache.lucene.index;
* limitations under the License.
*/
+import java.util.Random;
+
import org.apache.lucene.analysis.MockAnalyzer;
-import org.apache.lucene.document.BinaryDocValuesField;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.SortedDocValuesField;
import org.apache.lucene.store.BaseDirectoryWrapper;
-import org.apache.lucene.store.ByteArrayDataInput;
-import org.apache.lucene.store.ByteArrayDataOutput;
import org.apache.lucene.store.MockDirectoryWrapper;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LuceneTestCase;
@@ -34,7 +33,7 @@ import org.junit.Ignore;
import com.carrotsearch.randomizedtesting.annotations.TimeoutSuite;
@TimeoutSuite(millis = 80 * TimeUnits.HOUR)
-@Ignore("takes ?? minutes")
+@Ignore("very slow")
public class Test2BSortedDocValues extends LuceneTestCase {
// indexes Integer.MAX_VALUE docs with a fixed binary field
@@ -94,10 +93,7 @@ public class Test2BSortedDocValues exten
}
// indexes Integer.MAX_VALUE docs with a fixed binary field
- // nocommit: this must be some kind of worst case for BytesRefHash / its hash fn...
- // or there is some other perf bug...VERY slow!
- // if you cut this test to use random.nextBytes its much faster, but still quite slow...
- // and its not unrealistic for users to index something thats already in sorted order?
+ // TODO: must use random.nextBytes (like Test2BTerms) to avoid BytesRefHash probing issues
public void test2BOrds() throws Exception {
BaseDirectoryWrapper dir = newFSDirectory(_TestUtil.getTempDir("2BOrds"));
if (dir instanceof MockDirectoryWrapper) {
@@ -118,11 +114,11 @@ public class Test2BSortedDocValues exten
SortedDocValuesField dvField = new SortedDocValuesField("dv", data);
doc.add(dvField);
+ long seed = random().nextLong();
+ Random random = new Random(seed);
+
for (int i = 0; i < Integer.MAX_VALUE; i++) {
- bytes[0] = (byte)(i >> 24);
- bytes[1] = (byte)(i >> 16);
- bytes[2] = (byte)(i >> 8);
- bytes[3] = (byte) i;
+ random.nextBytes(bytes);
w.addDocument(doc);
if (i % 100000 == 0) {
System.out.println("indexed: " + i);
@@ -137,19 +133,15 @@ public class Test2BSortedDocValues exten
System.out.flush();
DirectoryReader r = DirectoryReader.open(dir);
- int expectedValue = 0;
+ random.setSeed(seed);
for (AtomicReaderContext context : r.leaves()) {
AtomicReader reader = context.reader();
BytesRef scratch = new BytesRef();
BinaryDocValues dv = reader.getSortedDocValues("dv");
for (int i = 0; i < reader.maxDoc(); i++) {
- bytes[0] = (byte)(expectedValue >> 24);
- bytes[1] = (byte)(expectedValue >> 16);
- bytes[2] = (byte)(expectedValue >> 8);
- bytes[3] = (byte) expectedValue;
+ random.nextBytes(bytes);
dv.get(i, scratch);
assertEquals(data, scratch);
- expectedValue++;
}
}