You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2013/01/13 16:49:36 UTC
svn commit: r1432646 - in /lucene/dev/trunk/lucene:
analysis/kuromoji/src/resources/org/apache/lucene/analysis/ja/dict/
core/src/java/org/apache/lucene/util/fst/
core/src/test/org/apache/lucene/util/fst/
Author: mikemccand
Date: Sun Jan 13 15:49:35 2013
New Revision: 1432646
URL: http://svn.apache.org/viewvc?rev=1432646&view=rev
Log:
LUCENE-4678: add BytesStore.truncate and use it to not double-buffer during pack
Modified:
lucene/dev/trunk/lucene/analysis/kuromoji/src/resources/org/apache/lucene/analysis/ja/dict/TokenInfoDictionary$fst.dat
lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/fst/BytesStore.java
lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/fst/FST.java
lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/util/fst/TestBytesStore.java
lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/util/fst/TestFSTs.java
Modified: lucene/dev/trunk/lucene/analysis/kuromoji/src/resources/org/apache/lucene/analysis/ja/dict/TokenInfoDictionary$fst.dat
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/kuromoji/src/resources/org/apache/lucene/analysis/ja/dict/TokenInfoDictionary%24fst.dat?rev=1432646&r1=1432645&r2=1432646&view=diff
==============================================================================
Binary files - no diff available.
Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/fst/BytesStore.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/fst/BytesStore.java?rev=1432646&r1=1432645&r2=1432646&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/fst/BytesStore.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/fst/BytesStore.java Sun Jan 13 15:49:35 2013
@@ -69,6 +69,14 @@ class BytesStore extends DataOutput {
nextWrite = blocks.get(blocks.size()-1).length;
}
+ /** Absolute write byte; you must ensure dest is < max
+ * position written so far. */
+ public void writeByte(int dest, byte b) {
+ int blockIndex = dest >> blockBits;
+ byte[] block = blocks.get(blockIndex);
+ block[dest & blockMask] = b;
+ }
+
@Override
public void writeByte(byte b) {
if (nextWrite == blockSize) {
@@ -237,9 +245,10 @@ class BytesStore extends DataOutput {
}
}
- /** Reverse the last numBytes. */
+ /** Reverse from srcPos, inclusive, to destPos, inclusive. */
public void reverse(int srcPos, int destPos) {
assert srcPos < destPos;
+ assert destPos < getPosition();
//System.out.println("reverse src=" + srcPos + " dest=" + destPos);
int srcBlockIndex = srcPos >> blockBits;
@@ -275,7 +284,7 @@ class BytesStore extends DataOutput {
}
}
- public void skip(int len) {
+ public void skipBytes(int len) {
while (len > 0) {
int chunk = blockSize - nextWrite;
if (len <= chunk) {
@@ -294,6 +303,26 @@ class BytesStore extends DataOutput {
return (blocks.size()-1) * blockSize + nextWrite;
}
+ /** Pos must be less than the max position written so far!
+ * Ie, you cannot "grow" the file with this! */
+ public void truncate(int newLen) {
+ assert newLen <= getPosition();
+ assert newLen >= 0;
+ int blockIndex = newLen >> blockBits;
+ nextWrite = newLen & blockMask;
+ if (nextWrite == 0) {
+ blockIndex--;
+ nextWrite = blockSize;
+ }
+ blocks.subList(blockIndex+1, blocks.size()).clear();
+ if (newLen == 0) {
+ current = null;
+ } else {
+ current = blocks.get(blockIndex);
+ }
+ assert newLen == getPosition();
+ }
+
public void finish() {
if (current != null) {
byte[] lastBuffer = new byte[nextWrite];
Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/fst/FST.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/fst/FST.java?rev=1432646&r1=1432645&r2=1432646&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/fst/FST.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/fst/FST.java Sun Jan 13 15:49:35 2013
@@ -730,7 +730,7 @@ public final class FST<T> {
int destPos = fixedArrayStart + nodeIn.numArcs*maxBytesPerArc;
assert destPos >= srcPos;
if (destPos > srcPos) {
- bytes.skip(destPos - srcPos);
+ bytes.skipBytes(destPos - srcPos);
for(int arcIdx=nodeIn.numArcs-1;arcIdx>=0;arcIdx--) {
destPos -= maxBytesPerArc;
srcPos -= bytesPerArc[arcIdx];
@@ -1444,9 +1444,6 @@ public final class FST<T> {
throw new IllegalArgumentException("this FST was not built with willPackFST=true");
}
- final RAMOutputStream buffer = new RAMOutputStream();
- byte[] bufferBytes = new byte[64];
-
Arc<T> arc = new Arc<T>();
final BytesReader r = getBytesReader(0);
@@ -1553,7 +1550,7 @@ public final class FST<T> {
// this is an array'd node and bytesPerArc changes:
writeNode:
while(true) { // retry writing this node
- assert buffer.getFilePointer() == 0;
+
//System.out.println(" cycle: retry");
readFirstRealTargetArc(node, arc, r);
@@ -1563,9 +1560,9 @@ public final class FST<T> {
if (bytesPerArc == 0) {
bytesPerArc = arc.bytesPerArc;
}
- buffer.writeByte(ARCS_AS_FIXED_ARRAY);
- buffer.writeVInt(arc.numArcs);
- buffer.writeVInt(bytesPerArc);
+ writer.writeByte(ARCS_AS_FIXED_ARRAY);
+ writer.writeVInt(arc.numArcs);
+ writer.writeVInt(bytesPerArc);
//System.out.println("node " + node + ": " + arc.numArcs + " arcs");
}
@@ -1574,7 +1571,7 @@ public final class FST<T> {
while(true) { // iterate over all arcs for this node
//System.out.println(" cycle next arc");
- final int arcStartPos = (int) buffer.getFilePointer();
+ final int arcStartPos = writer.getPosition();
nodeArcCount++;
byte flags = 0;
@@ -1621,7 +1618,7 @@ public final class FST<T> {
absPtr = topNodeMap.size() + (int) newNodeAddress.get(arc.target) + addressError;
}
- int delta = (int) (newNodeAddress.get(arc.target) + addressError - buffer.getFilePointer() - address - 2);
+ int delta = (int) (newNodeAddress.get(arc.target) + addressError - writer.getPosition() - 2);
if (delta < 0) {
//System.out.println("neg: " + delta);
anyNegDelta = true;
@@ -1637,23 +1634,23 @@ public final class FST<T> {
}
assert flags != ARCS_AS_FIXED_ARRAY;
- buffer.writeByte(flags);
+ writer.writeByte(flags);
- fst.writeLabel(buffer, arc.label);
+ fst.writeLabel(writer, arc.label);
if (arc.output != NO_OUTPUT) {
- outputs.write(arc.output, buffer);
+ outputs.write(arc.output, writer);
if (!retry) {
fst.arcWithOutputCount++;
}
}
if (arc.nextFinalOutput != NO_OUTPUT) {
- outputs.writeFinalOutput(arc.nextFinalOutput, buffer);
+ outputs.writeFinalOutput(arc.nextFinalOutput, writer);
}
if (doWriteTarget) {
- int delta = (int) (newNodeAddress.get(arc.target) + addressError - buffer.getFilePointer() - address);
+ int delta = (int) (newNodeAddress.get(arc.target) + addressError - writer.getPosition());
if (delta < 0) {
anyNegDelta = true;
//System.out.println("neg: " + delta);
@@ -1662,7 +1659,7 @@ public final class FST<T> {
if (flag(flags, BIT_TARGET_DELTA)) {
//System.out.println(" delta");
- buffer.writeVInt(delta);
+ writer.writeVInt(delta);
if (!retry) {
deltaCount++;
}
@@ -1674,7 +1671,7 @@ public final class FST<T> {
System.out.println(" abs");
}
*/
- buffer.writeVInt(absPtr);
+ writer.writeVInt(absPtr);
if (!retry) {
if (absPtr >= topNodeMap.size()) {
absCount++;
@@ -1686,7 +1683,7 @@ public final class FST<T> {
}
if (useArcArray) {
- final int arcBytes = (int) (buffer.getFilePointer() - arcStartPos);
+ final int arcBytes = writer.getPosition() - arcStartPos;
//System.out.println(" " + arcBytes + " bytes");
maxBytesPerArc = Math.max(maxBytesPerArc, arcBytes);
// NOTE: this may in fact go "backwards", if
@@ -1696,11 +1693,7 @@ public final class FST<T> {
// will retry (below) so it's OK to ovewrite
// bytes:
//wasted += bytesPerArc - arcBytes;
- int skip = (int) (arcStartPos + bytesPerArc - buffer.getFilePointer());
- while(skip > 0) {
- buffer.writeByte((byte) 0);
- skip--;
- }
+ writer.skipBytes(arcStartPos + bytesPerArc - writer.getPosition());
}
if (arc.isLast()) {
@@ -1725,19 +1718,12 @@ public final class FST<T> {
// Retry:
bytesPerArc = maxBytesPerArc;
- buffer.reset();
+ writer.truncate(address);
nodeArcCount = 0;
retry = true;
anyNegDelta = false;
}
- if (bufferBytes.length < (int) buffer.getFilePointer()) {
- bufferBytes = ArrayUtil.grow(bufferBytes, (int) buffer.getFilePointer());
- }
- buffer.writeTo(bufferBytes, 0);
- writer.writeBytes(bufferBytes, 0, (int) buffer.getFilePointer());
- buffer.reset();
-
negDelta |= anyNegDelta;
fst.arcCount += nodeArcCount;
Modified: lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/util/fst/TestBytesStore.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/util/fst/TestBytesStore.java?rev=1432646&r1=1432645&r2=1432646&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/util/fst/TestBytesStore.java (original)
+++ lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/util/fst/TestBytesStore.java Sun Jan 13 15:49:35 2013
@@ -42,7 +42,7 @@ public class TestBytesStore extends Luce
int pos = 0;
while(pos < numBytes) {
- int op = random().nextInt(7);
+ int op = random().nextInt(8);
if (VERBOSE) {
System.out.println(" cycle pos=" + pos);
}
@@ -97,21 +97,21 @@ public class TestBytesStore extends Luce
case 3:
{
// reverse bytes
- if (pos > 0) {
- int len = _TestUtil.nextInt(random(), 1, Math.min(100, pos));
+ if (pos > 1) {
+ int len = _TestUtil.nextInt(random(), 2, Math.min(100, pos));
int start;
if (len == pos) {
start = 0;
} else {
start = random().nextInt(pos - len);
}
- int end = start + len;
+ int end = start + len - 1;
if (VERBOSE) {
- System.out.println(" reverse start=" + start + " end=" + end + " len=" + len);
+ System.out.println(" reverse start=" + start + " end=" + end + " len=" + len + " pos=" + pos);
}
bytes.reverse(start, end);
- while(start < end) {
+ while(start <= end) {
byte b = expected[end];
expected[end] = expected[start];
expected[start] = b;
@@ -159,17 +159,49 @@ public class TestBytesStore extends Luce
{
// skip
int len = random().nextInt(Math.min(100, numBytes - pos));
- pos += len;
- bytes.skip(len);
+
if (VERBOSE) {
System.out.println(" skip len=" + len);
}
+
+ pos += len;
+ bytes.skipBytes(len);
+
+ // NOTE: must fill in zeros in case truncate was
+ // used, else we get false fails:
+ if (len > 0) {
+ byte[] zeros = new byte[len];
+ bytes.writeBytes(pos-len, zeros, 0, len);
+ }
}
break;
+
+ case 7:
+ {
+ // absWriteByte
+ if (pos > 0) {
+ int dest = random().nextInt(pos);
+ byte b = (byte) random().nextInt(256);
+ expected[dest] = b;
+ bytes.writeByte(dest, b);
+ }
+ break;
+ }
}
assertEquals(pos, bytes.getPosition());
+ if (pos > 0 && random().nextInt(50) == 17) {
+ // truncate
+ int len = _TestUtil.nextInt(random(), 1, Math.min(pos, 100));
+ bytes.truncate(pos - len);
+ pos -= len;
+ Arrays.fill(expected, pos, pos+len, (byte) 0);
+ if (VERBOSE) {
+ System.out.println(" truncate len=" + len + " newPos=" + pos);
+ }
+ }
+
if ((pos > 0 && random().nextInt(200) == 17)) {
verify(bytes, expected, pos);
}
Modified: lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/util/fst/TestFSTs.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/util/fst/TestFSTs.java?rev=1432646&r1=1432645&r2=1432646&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/util/fst/TestFSTs.java (original)
+++ lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/util/fst/TestFSTs.java Sun Jan 13 15:49:35 2013
@@ -483,11 +483,14 @@ public class TestFSTs extends LuceneTest
break;
}
}
- long t = System.currentTimeMillis() - tStart;
- System.out.println((t / 1000.0) + " sec to build");
+
+ long tMid = System.currentTimeMillis();
+ System.out.println(((tMid-tStart) / 1000.0) + " sec to add all terms");
assert builder.getTermCount() == ord;
FST<T> fst = builder.finish();
+ long tEnd = System.currentTimeMillis();
+ System.out.println(((tEnd-tMid) / 1000.0) + " sec to finish/pack");
if (fst == null) {
System.out.println("FST was fully pruned!");
System.exit(0);