You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@sis.apache.org by de...@apache.org on 2022/12/22 16:19:31 UTC

[sis] 01/02: Allow GeoTIFF reader to specify in advance the desired range of bytes.

This is an automated email from the ASF dual-hosted git repository.

desruisseaux pushed a commit to branch geoapi-4.0
in repository https://gitbox.apache.org/repos/asf/sis.git

commit 552bed2adbb19ed421001934fa0ea3b5b73c4d65
Author: Martin Desruisseaux <ma...@geomatys.com>
AuthorDate: Thu Dec 22 15:37:48 2022 +0100

    Allow GeoTIFF reader to specify in advance the desired range of bytes.
---
 .../apache/sis/cloud/aws/s3/CachedByteChannel.java | 13 +++---
 .../storage/inflater/CompressionChannel.java       |  3 +-
 .../sis/internal/storage/io/ChannelDataInput.java  | 20 ++++++--
 .../internal/storage/io/FileCacheByteChannel.java  | 54 +++++++++-------------
 .../storage/io/FileCacheByteChannelTest.java       | 12 +++--
 5 files changed, 55 insertions(+), 47 deletions(-)

diff --git a/cloud/sis-cloud-aws/src/main/java/org/apache/sis/cloud/aws/s3/CachedByteChannel.java b/cloud/sis-cloud-aws/src/main/java/org/apache/sis/cloud/aws/s3/CachedByteChannel.java
index 135e2ba08f..dc7800160a 100644
--- a/cloud/sis-cloud-aws/src/main/java/org/apache/sis/cloud/aws/s3/CachedByteChannel.java
+++ b/cloud/sis-cloud-aws/src/main/java/org/apache/sis/cloud/aws/s3/CachedByteChannel.java
@@ -17,7 +17,6 @@
 package org.apache.sis.cloud.aws.s3;
 
 import java.util.List;
-import java.util.OptionalLong;
 import java.io.IOException;
 import java.io.InputStream;
 import org.apache.sis.internal.storage.io.FileCacheByteChannel;
@@ -93,13 +92,15 @@ final class CachedByteChannel extends FileCacheByteChannel {
         } catch (SdkException e) {
             throw FileService.failure(path, e);
         }
-        final List<String> arl = (acceptRanges != null) ? List.of(acceptRanges) : List.of();
+        final List<String> rangeUnits = (acceptRanges != null) ? List.of(acceptRanges) : List.of();
+        final long length = (contentLength != null) ? contentLength : -1;
         if (contentRange == null) {
-            final long length = (contentLength != null) ? contentLength : -1;
-            return new Connection(stream, 0, (length < 0) ? Long.MAX_VALUE : length, length, Connection.acceptRanges(arl));
+            return new Connection(stream, 0, (length < 0) ? Long.MAX_VALUE : length, length, Connection.acceptRanges(rangeUnits));
+        } else try {
+            return new Connection(stream, contentRange, length, rangeUnits);
+        } catch (IllegalArgumentException e) {
+            throw new IOException(e);
         }
-        return new Connection(stream, contentRange, arl,
-                (contentLength != null) ? OptionalLong.of(contentLength) : OptionalLong.empty());
     }
 
     /**
diff --git a/storage/sis-geotiff/src/main/java/org/apache/sis/internal/storage/inflater/CompressionChannel.java b/storage/sis-geotiff/src/main/java/org/apache/sis/internal/storage/inflater/CompressionChannel.java
index 48c2a936bd..ef54e3b7a1 100644
--- a/storage/sis-geotiff/src/main/java/org/apache/sis/internal/storage/inflater/CompressionChannel.java
+++ b/storage/sis-geotiff/src/main/java/org/apache/sis/internal/storage/inflater/CompressionChannel.java
@@ -33,7 +33,7 @@ import org.apache.sis.storage.event.StoreListeners;
  * <p>The {@link #close()} method shall be invoked when this channel is no longer used.</p>
  *
  * @author  Martin Desruisseaux (Geomatys)
- * @version 1.3
+ * @version 1.4
  * @since   1.1
  * @module
  */
@@ -83,6 +83,7 @@ abstract class CompressionChannel extends PixelChannel {
     public void setInputRegion(final long start, final long byteCount) throws IOException {
         endPosition = Math.addExact(start, byteCount);
         input.seek(start);
+        input.endOfInterest(endPosition);
     }
 
     /**
diff --git a/storage/sis-storage/src/main/java/org/apache/sis/internal/storage/io/ChannelDataInput.java b/storage/sis-storage/src/main/java/org/apache/sis/internal/storage/io/ChannelDataInput.java
index 45802f35ff..53c6ef2778 100644
--- a/storage/sis-storage/src/main/java/org/apache/sis/internal/storage/io/ChannelDataInput.java
+++ b/storage/sis-storage/src/main/java/org/apache/sis/internal/storage/io/ChannelDataInput.java
@@ -916,9 +916,7 @@ public class ChannelDataInput extends ChannelData {
         } else if ((p < 0 || p - buffer.limit() >= SEEK_THRESHOLD) && channel instanceof SeekableByteChannel) {
             /*
              * Requested position is outside the current limits of the buffer,
-             * but we can set the new position directly in the channel. Note
-             * that StorageConnector.rewind() needs the buffer content to be
-             * valid as a result of this seek, so we reload it immediately.
+             * but we can set the new position directly in the channel.
              */
             ((SeekableByteChannel) channel).position(Math.addExact(channelOffset, position));
             bufferOffset = position;
@@ -951,6 +949,22 @@ public class ChannelDataInput extends ChannelData {
         clearBitOffset();
     }
 
+    /**
+     * Specifies the position after the last byte which is expected to be read.
+     * The number of bytes is only a hint and may be ignored, depending on the channel.
+     * Reading more bytes than specified is okay, only potentially less efficient.
+     * Values ≤ {@linkplain #position() position} means to read until the end of stream.
+     *
+     * @param  position  position after the last desired byte,
+     *         or a value ≤ current position for reading until the end of stream.
+     */
+    public final void endOfInterest(final long position) {
+        if (channel instanceof FileCacheByteChannel) {
+            ((FileCacheByteChannel) channel).endOfInterest(position + channelOffset);
+            // Overflow is okay as value ≤ position means "read until end of stream".
+        }
+    }
+
     /**
      * Empties the buffer and reset the channel position at the beginning of the stream.
      * This method is similar to {@code seek(0)} except that the buffer content is discarded.
diff --git a/storage/sis-storage/src/main/java/org/apache/sis/internal/storage/io/FileCacheByteChannel.java b/storage/sis-storage/src/main/java/org/apache/sis/internal/storage/io/FileCacheByteChannel.java
index d118eba197..b40ac37e6c 100644
--- a/storage/sis-storage/src/main/java/org/apache/sis/internal/storage/io/FileCacheByteChannel.java
+++ b/storage/sis-storage/src/main/java/org/apache/sis/internal/storage/io/FileCacheByteChannel.java
@@ -16,7 +16,6 @@
  */
 package org.apache.sis.internal.storage.io;
 
-import java.util.OptionalLong;
 import java.io.EOFException;
 import java.io.IOException;
 import java.io.InputStream;
@@ -42,7 +41,7 @@ import org.apache.sis.util.collection.RangeSet;
  *
  * <ul>
  *   <li>Bytes read from the input stream are cached in a temporary file for making backward seeks possible.</li>
- *   <li>The number of bytes of interest {@linkplain #position(long, long) can be specified}.
+ *   <li>The number of bytes of interest {@linkplain #endOfInterest(long) can be specified}.
  *       It makes possible to specify the range of bytes to download with HTTP connections.</li>
  *   <li>This implementation is thread-safe.</li>
  *   <li>Current implementation is read-only.</li>
@@ -96,19 +95,19 @@ public abstract class FileCacheByteChannel implements SeekableByteChannel {
         /**
          * Creates information about a connection.
          *
-         * @param input         the input stream for reading the bytes.
-         * @param start         position of the first byte read by the input stream (inclusive).
-         * @param end           position of the last byte read by the input stream (inclusive).
-         * @param length        total length of the stream, or -1 is unknown.
-         * @param acceptRanges  whether connection can be created for ranges of bytes.
+         * @param input          the input stream for reading the bytes.
+         * @param start          position of the first byte read by the input stream (inclusive).
+         * @param end            position of the last byte read by the input stream (inclusive).
+         * @param contentLength  total length of the stream, or -1 if unknown.
+         * @param acceptRanges   whether connection can be created for ranges of bytes.
          *
          * @see #openConnection(long, long)
          */
-        public Connection(final InputStream input, final long start, final long end, final long length, final boolean acceptRanges) {
+        public Connection(final InputStream input, final long start, final long end, final long contentLength, final boolean acceptRanges) {
             this.input  = input;
             this.start  = start;
             this.end    = end;
-            this.length = length;
+            this.length = contentLength;
             this.acceptRanges = acceptRanges;
         }
 
@@ -119,12 +118,10 @@ public abstract class FileCacheByteChannel implements SeekableByteChannel {
          * @param  input          the input stream for reading the bytes.
          * @param  contentRange   value of "Content-Range" in HTTP header.
          * @param  acceptRanges   value of "Accept-Ranges" in HTTP header.
-         * @param  contentLength  total length of the stream.
+         * @param  contentLength  total length of the stream, or -1 if unknown.
          * @throws IllegalArgumentException if the start, end of length cannot be parsed.
          */
-        public Connection(final InputStream input, String contentRange, final Iterable<String> acceptRanges,
-                          final OptionalLong contentLength)
-        {
+        public Connection(final InputStream input, String contentRange, long contentLength, final Iterable<String> acceptRanges) {
             this.input = input;
             contentRange = contentRange.trim();
             int s = contentRange.indexOf(' ');
@@ -133,14 +130,11 @@ public abstract class FileCacheByteChannel implements SeekableByteChannel {
             }
             int rs = contentRange.indexOf('-', ++s);                    // Index of range separator.
             int ls = contentRange.indexOf('/', Math.max(s, rs+1));      // Index of length separator.
-            if (contentLength.isPresent()) {
-                length = contentLength.getAsLong();
-            } else if (ls >= 0) {
-                String t = contentRange.substring(ls+1).trim();
-                length = t.equals("*") ? -1 : Long.parseLong(t);
-            } else {
-                length = -1;
+            if (contentLength < 0 && ls >= 0) {
+                final String t = contentRange.substring(ls+1).trim();
+                if (!t.equals("*")) contentLength = Long.parseLong(t);
             }
+            length = contentLength;
             if (ls < 0) ls = contentRange.length();
             if (rs < 0) rs = ls;
             start = Long.parseLong(contentRange.substring(s, rs).trim());
@@ -220,7 +214,7 @@ public abstract class FileCacheByteChannel implements SeekableByteChannel {
      * Position after the last requested byte, or ≤ {@linkplain #position} if unknown.
      * It can be used for specifying the range of bytes to download from an HTTP connection.
      *
-     * @see #position(long, long)
+     * @see #endOfInterest(long)
      */
     private long endOfInterest;
 
@@ -335,19 +329,15 @@ public abstract class FileCacheByteChannel implements SeekableByteChannel {
     }
 
     /**
-     * Sets this channel's position together with the number of bytes to read.
+     * Specifies the position after the last byte which is expected to be read.
      * The number of bytes is only a hint and may be ignored, depending on subclasses.
      * Reading more bytes than specified is okay, only potentially less efficient.
+     * Values ≤ {@linkplain #position() position} means to read until the end of stream.
      *
-     * @param  newPosition  number of bytes from the beginning to the desired position.
-     * @param  count        expected number of bytes to read.
-     * @throws IOException if an I/O error occurs.
+     * @param  end  position after the last desired byte, or a value ≤ position for reading until the end of stream.
      */
-    final synchronized void position(final long newPosition, final long count) throws IOException {
-        ArgumentChecks.ensurePositive("newPosition", newPosition);
-        ArgumentChecks.ensureStrictlyPositive("count", count);
-        position = newPosition;
-        endOfInterest = newPosition + count;    // Overflow is okay here (will read until end of stream).
+    final synchronized void endOfInterest(final long end) {
+        endOfInterest = end;
     }
 
     /**
@@ -361,7 +351,7 @@ public abstract class FileCacheByteChannel implements SeekableByteChannel {
     private Connection openConnection() throws IOException {
         long end = endOfInterest;
         if (end > position) end--;      // Make inclusive.
-        else end = Long.MAX_VALUE;
+        else end = (length > 0) ? length-1 : Long.MAX_VALUE;
         var c = openConnection(position, end);
         file.position(c.start);
         if (c.length >= 0) {
@@ -631,7 +621,7 @@ public abstract class FileCacheByteChannel implements SeekableByteChannel {
      * Returns a string representation for debugging purpose.
      */
     @Override
-    public String toString() {
+    public synchronized String toString() {
         return Strings.toString(getClass(), "filename", filename(), "position", position, "rangeCount", rangesOfAvailableBytes.size());
     }
 }
diff --git a/storage/sis-storage/src/test/java/org/apache/sis/internal/storage/io/FileCacheByteChannelTest.java b/storage/sis-storage/src/test/java/org/apache/sis/internal/storage/io/FileCacheByteChannelTest.java
index e33951c6b2..6c633f5768 100644
--- a/storage/sis-storage/src/test/java/org/apache/sis/internal/storage/io/FileCacheByteChannelTest.java
+++ b/storage/sis-storage/src/test/java/org/apache/sis/internal/storage/io/FileCacheByteChannelTest.java
@@ -155,7 +155,8 @@ public final strictfp class FileCacheByteChannelTest extends TestCase {
                     position = end;
                     end = t;
                 }
-                channel.position(position, end - position + 1);
+                channel.position(position);
+                channel.endOfInterest(end + 1);
             }
             channel.readInRandomRegion(buffer);
             while (buffer.hasRemaining()) {
@@ -174,24 +175,25 @@ public final strictfp class FileCacheByteChannelTest extends TestCase {
      */
     @Test
     public void testParseRange() {
+        final List<String> rangesUnit = List.of("bytes");
         FileCacheByteChannel.Connection c;
-        c = new FileCacheByteChannel.Connection(null, "bytes 25000-75000/100000", List.of("bytes"), OptionalLong.empty());
+        c = new FileCacheByteChannel.Connection(null, "bytes 25000-75000/100000", -1, rangesUnit);
         assertEquals( 25000, c.start);
         assertEquals( 75000, c.end);
         assertEquals(100000, c.length);
 
-        c = new FileCacheByteChannel.Connection(null, "bytes 25000-75000", List.of("bytes"), OptionalLong.empty());
+        c = new FileCacheByteChannel.Connection(null, "bytes 25000-75000", -1, rangesUnit);
         assertEquals( 25000, c.start);
         assertEquals( 75000, c.end);
         assertEquals(    -1, c.length);
 
-        c = new FileCacheByteChannel.Connection(null, "bytes 25000/100000", List.of("bytes"), OptionalLong.empty());
+        c = new FileCacheByteChannel.Connection(null, "bytes 25000/100000", -1, rangesUnit);
         assertEquals( 25000, c.start);
         assertEquals(100000, c.end);
         assertEquals(100000, c.length);
 
         // Not legal, but we test robustness.
-        c = new FileCacheByteChannel.Connection(null, "25000", List.of("bytes"), OptionalLong.empty());
+        c = new FileCacheByteChannel.Connection(null, "25000", -1, rangesUnit);
         assertEquals( 25000, c.start);
         assertEquals(    -1, c.end);
         assertEquals(    -1, c.length);