You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by jp...@apache.org on 2014/04/10 09:25:43 UTC

svn commit: r1586232 - in /lucene/dev/branches/branch_4x: ./ lucene/ lucene/core/ lucene/core/src/java/org/apache/lucene/codecs/ lucene/core/src/java/org/apache/lucene/codecs/compressing/ lucene/core/src/java/org/apache/lucene/store/

Author: jpountz
Date: Thu Apr 10 07:25:42 2014
New Revision: 1586232

URL: http://svn.apache.org/r1586232
Log:
LUCENE-5583: Add DataInput.skipBytes, ChecksumIndexInput can now seek forward.

Modified:
    lucene/dev/branches/branch_4x/   (props changed)
    lucene/dev/branches/branch_4x/lucene/   (props changed)
    lucene/dev/branches/branch_4x/lucene/CHANGES.txt
    lucene/dev/branches/branch_4x/lucene/core/   (props changed)
    lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/CodecUtil.java
    lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsReader.java
    lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/store/ByteArrayDataInput.java
    lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/store/ChecksumIndexInput.java
    lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/store/DataInput.java

Modified: lucene/dev/branches/branch_4x/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/CHANGES.txt?rev=1586232&r1=1586231&r2=1586232&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/CHANGES.txt (original)
+++ lucene/dev/branches/branch_4x/lucene/CHANGES.txt Thu Apr 10 07:25:42 2014
@@ -95,6 +95,9 @@ New Features
 * LUCENE-5580: Checksums are automatically verified on the default stored
   fields format when performing a bulk merge. (Adrien Grand)
 
+* LUCENE-5583: Added DataInput.skipBytes. ChecksumIndexInput can now seek, but
+  only forward. (Adrien Grand, Mike McCandless, Simon Willnauer, Uwe Schindler)
+
 API Changes
 
 * LUCENE-5454: Add RandomAccessOrds, an optional extension of SortedSetDocValues

Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/CodecUtil.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/CodecUtil.java?rev=1586232&r1=1586231&r2=1586232&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/CodecUtil.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/CodecUtil.java Thu Apr 10 07:25:42 2014
@@ -264,13 +264,7 @@ public final class CodecUtil {
     clone.seek(0);
     ChecksumIndexInput in = new BufferedChecksumIndexInput(clone);
     assert in.getFilePointer() == 0;
-    final byte[] buffer = new byte[1024];
-    long bytesToRead = in.length() - footerLength();
-    for (long skipped = 0; skipped < bytesToRead; ) {
-      final int toRead = (int) Math.min(bytesToRead - skipped, buffer.length);
-      in.readBytes(buffer, 0, toRead);
-      skipped += toRead;
-    }
+    in.seek(in.length() - footerLength());
     return checkFooter(in);
   }
 }

Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsReader.java?rev=1586232&r1=1586231&r2=1586232&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsReader.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsReader.java Thu Apr 10 07:25:42 2014
@@ -70,18 +70,6 @@ public final class CompressingStoredFiel
   // Do not reuse the decompression buffer when there is more than 32kb to decompress
   private static final int BUFFER_REUSE_THRESHOLD = 1 << 15;
 
-  private static final byte[] SKIP_BUFFER = new byte[1024];
-
-  // TODO: should this be a method on DataInput?
-  private static void skipBytes(DataInput in, long numBytes) throws IOException {
-    assert numBytes >= 0;
-    for (long skipped = 0; skipped < numBytes; ) {
-      final int toRead = (int) Math.min(numBytes - skipped, SKIP_BUFFER.length);
-      in.readBytes(SKIP_BUFFER, 0, toRead);
-      skipped += toRead;
-    }
-  }
-
   private final int version;
   private final FieldInfos fieldInfos;
   private final CompressingStoredFieldsIndexReader indexReader;
@@ -223,7 +211,7 @@ public final class CompressingStoredFiel
       case BYTE_ARR:
       case STRING:
         final int length = in.readVInt();
-        skipBytes(in, length);
+        in.skipBytes(length);
         break;
       case NUMERIC_INT:
       case NUMERIC_FLOAT:
@@ -416,24 +404,7 @@ public final class CompressingStoredFiel
 
       IndexInput in = CompressingStoredFieldsReader.this.fieldsStream;
       in.seek(0);
-      fieldsStream = new BufferedChecksumIndexInput(in) {
-
-        final byte[] skipBuffer = new byte[256];
-
-        @Override
-        public void seek(long target) throws IOException {
-          final long skip = target - getFilePointer();
-          if (skip < 0) {
-            throw new IllegalStateException("Seeking backward on merge: " + skip);
-          }
-          for (long skipped = 0; skipped < skip; ) {
-            final int step = (int) Math.min(skipBuffer.length, skip - skipped);
-            readBytes(skipBuffer, 0, step);
-            skipped += step;
-          }
-        }
-
-      };
+      fieldsStream = new BufferedChecksumIndexInput(in);
       fieldsStream.seek(indexReader.getStartPointer(startDocId));
     }
 

Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/store/ByteArrayDataInput.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/store/ByteArrayDataInput.java?rev=1586232&r1=1586231&r2=1586232&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/store/ByteArrayDataInput.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/store/ByteArrayDataInput.java Thu Apr 10 07:25:42 2014
@@ -75,7 +75,8 @@ public final class ByteArrayDataInput ex
     return pos == limit;
   }
 
-  public void skipBytes(int count) {
+  @Override
+  public void skipBytes(long count) {
     pos += count;
   }
 

Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/store/ChecksumIndexInput.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/store/ChecksumIndexInput.java?rev=1586232&r1=1586231&r2=1586232&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/store/ChecksumIndexInput.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/store/ChecksumIndexInput.java Thu Apr 10 07:25:42 2014
@@ -35,8 +35,19 @@ public abstract class ChecksumIndexInput
   /** Returns the current checksum value */
   public abstract long getChecksum() throws IOException;
 
+  /**
+   * {@inheritDoc}
+   *
+   * {@link ChecksumIndexInput} can only seek forward and seeks are expensive
+   * since they imply to read bytes in-between the current position and the
+   * target position in order to update the checksum.
+   */
   @Override
   public void seek(long pos) throws IOException {
-    throw new UnsupportedOperationException();
+    final long skip = pos - getFilePointer();
+    if (skip < 0) {
+      throw new IllegalStateException(ChecksumIndexInput.class + " cannot seed backward");
+    }
+    skipBytes(skip);
   }
 }

Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/store/DataInput.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/store/DataInput.java?rev=1586232&r1=1586231&r2=1586232&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/store/DataInput.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/store/DataInput.java Thu Apr 10 07:25:42 2014
@@ -36,6 +36,19 @@ import java.util.Set;
  * resource, but positioned independently.
  */
 public abstract class DataInput implements Cloneable {
+
+  private static final int SKIP_BUFFER_SIZE = 1024;
+
+  /* This buffer is used to skip over bytes with the default implementation of
+   * skipBytes. The reason why we need to use an instance member instead of
+   * sharing a single instance across threads is that some delegating
+   * implementations of DataInput might want to reuse the provided buffer in
+   * order to eg. update the checksum. If we shared the same buffer across
+   * threads, then another thread might update the buffer while the checksum is
+   * being computed, making it invalid. See LUCENE-5583 for more information.
+   */
+  private byte[] skipBuffer;
+
   /** Reads and returns a single byte.
    * @see DataOutput#writeByte(byte)
    */
@@ -233,4 +246,26 @@ public abstract class DataInput implemen
 
     return set;
   }
+
+  /**
+   * Skip over <code>numBytes</code> bytes. The contract on this method is that it
+   * should have the same behavior as reading the same number of bytes into a
+   * buffer and discarding its content. Negative values of <code>numBytes</code>
+   * are not supported.
+   */
+  public void skipBytes(final long numBytes) throws IOException {
+    if (numBytes < 0) {
+      throw new IllegalArgumentException("numBytes must be >= 0, got " + numBytes);
+    }
+    if (skipBuffer == null) {
+      skipBuffer = new byte[SKIP_BUFFER_SIZE];
+    }
+    assert skipBuffer.length == SKIP_BUFFER_SIZE;
+    for (long skipped = 0; skipped < numBytes; ) {
+      final int step = (int) Math.min(SKIP_BUFFER_SIZE, numBytes - skipped);
+      readBytes(skipBuffer, 0, step, false);
+      skipped += step;
+    }
+  }
+
 }