You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@sis.apache.org by de...@apache.org on 2018/11/22 19:57:08 UTC

[sis] 01/02: Improve performance of Vector.repetitions(). It make a big difference for large grids like HYCOM.

This is an automated email from the ASF dual-hosted git repository.

desruisseaux pushed a commit to branch geoapi-4.0
in repository https://gitbox.apache.org/repos/asf/sis.git

commit 9f76515a09f31219c5f9451469be17a6f9b8f200
Author: Martin Desruisseaux <ma...@geomatys.com>
AuthorDate: Thu Nov 22 20:55:44 2018 +0100

    Improve performance of Vector.repetitions(). It make a big difference for large grids like HYCOM.
---
 .../main/java/org/apache/sis/math/ArrayVector.java |  93 ++++++++++++-
 .../java/org/apache/sis/math/RepeatedVector.java   |   4 +-
 .../src/main/java/org/apache/sis/math/Vector.java  | 149 ++++++++++++++++++---
 .../org/apache/sis/math/RepeatedVectorTest.java    |  54 +++++++-
 4 files changed, 273 insertions(+), 27 deletions(-)

diff --git a/core/sis-utility/src/main/java/org/apache/sis/math/ArrayVector.java b/core/sis-utility/src/main/java/org/apache/sis/math/ArrayVector.java
index 4e4e032..8936dc9 100644
--- a/core/sis-utility/src/main/java/org/apache/sis/math/ArrayVector.java
+++ b/core/sis-utility/src/main/java/org/apache/sis/math/ArrayVector.java
@@ -251,8 +251,24 @@ abstract class ArrayVector<E extends Number> extends Vector implements CheckedCo
             return index;
         }
 
+        /** Returns whether this vector in the given range is equals to the specified vector. */
+        @Override boolean equals(int lower, final int upper, final Vector other, int otherOffset) {
+            if (other instanceof Doubles) {
+                // TODO: replace by Arrays.equals(…) with JDK9.
+                final double[] cmp = ((Doubles) other).array;
+                while (lower < upper) {
+                    if (Double.doubleToLongBits(array[lower++]) != Double.doubleToLongBits(cmp[otherOffset++])) {
+                        return false;
+                    }
+                }
+                return true;
+            }
+            return super.equals(lower, upper, other, otherOffset);
+        }
+
         /** Finds the minimum and maximum values in the array or in a subset of the array. */
         @Override NumberRange<Double> range(final IntSupplier indices, int n) {
+            // TODO: try to paralellize with streams.
             double min = Double.POSITIVE_INFINITY;
             double max = Double.NEGATIVE_INFINITY;
             while (--n >= 0) {
@@ -331,12 +347,27 @@ abstract class ArrayVector<E extends Number> extends Vector implements CheckedCo
         }
 
         /** Finds index of a match or mismatch (depending on {@code equality}). */
-        @Override int indexOf(final int toSearch, int index, final boolean equality) {
+        @Override final int indexOf(final int toSearch, int index, final boolean equality) {
             final int first = Float.floatToIntBits(array[toSearch]);
             while (index < array.length && (first == Float.floatToIntBits(array[index])) != equality) index++;
             return index;
         }
 
+        /** Returns whether this vector in the given range is equals to the specified vector. */
+        @Override final boolean equals(int lower, final int upper, final Vector other, int otherOffset) {
+            if (other.getClass() == getClass()) {
+                // TODO: replace by Arrays.equals(…) with JDK9.
+                final float[] cmp = ((Floats) other).array;
+                while (lower < upper) {
+                    if (Float.floatToIntBits(array[lower++]) != Float.floatToIntBits(cmp[otherOffset++])) {
+                        return false;
+                    }
+                }
+                return true;
+            }
+            return super.equals(lower, upper, other, otherOffset);
+        }
+
         /** Finds the minimum and maximum values in the array or in a subset of the array. */
         @Override final NumberRange<?> range(final IntSupplier indices, int n) {
             float min = Float.POSITIVE_INFINITY;
@@ -461,6 +492,21 @@ abstract class ArrayVector<E extends Number> extends Vector implements CheckedCo
             return index;
         }
 
+        /** Returns whether this vector in the given range is equals to the specified vector. */
+        @Override final boolean equals(int lower, final int upper, final Vector other, int otherOffset) {
+            if (other.getClass() == getClass()) {
+                // TODO: replace by Arrays.equals(…) with JDK9.
+                final long[] cmp = ((Longs) other).array;
+                while (lower < upper) {
+                    if (array[lower++] != cmp[otherOffset++]) {
+                        return false;
+                    }
+                }
+                return true;
+            }
+            return super.equals(lower, upper, other, otherOffset);
+        }
+
         /** Finds the minimum and maximum values in the array or in a subset of the array. */
         @Override NumberRange<?> range(final IntSupplier indices, int n) {
             long min = Long.MAX_VALUE;
@@ -560,6 +606,21 @@ abstract class ArrayVector<E extends Number> extends Vector implements CheckedCo
             return index;
         }
 
+        /** Returns whether this vector in the given range is equals to the specified vector. */
+        @Override final boolean equals(int lower, final int upper, final Vector other, int otherOffset) {
+            if (other.getClass() == getClass()) {
+                // TODO: replace by Arrays.equals(…) with JDK9.
+                final int[] cmp = ((Integers) other).array;
+                while (lower < upper) {
+                    if (array[lower++] != cmp[otherOffset++]) {
+                        return false;
+                    }
+                }
+                return true;
+            }
+            return super.equals(lower, upper, other, otherOffset);
+        }
+
         /** Finds the minimum and maximum values in the array or in a subset of the array. */
         @Override NumberRange<?> range(final IntSupplier indices, int n) {
             int min = Integer.MAX_VALUE;
@@ -663,6 +724,21 @@ abstract class ArrayVector<E extends Number> extends Vector implements CheckedCo
             return index;
         }
 
+        /** Returns whether this vector in the given range is equals to the specified vector. */
+        @Override final boolean equals(int lower, final int upper, final Vector other, int otherOffset) {
+            if (other.getClass() == getClass()) {
+                // TODO: replace by Arrays.equals(…) with JDK9.
+                final short[] cmp = ((Shorts) other).array;
+                while (lower < upper) {
+                    if (array[lower++] != cmp[otherOffset++]) {
+                        return false;
+                    }
+                }
+                return true;
+            }
+            return super.equals(lower, upper, other, otherOffset);
+        }
+
         /** Finds the minimum and maximum values in the array or in a subset of the array. */
         @Override NumberRange<?> range(final IntSupplier indices, int n) {
             short min = Short.MAX_VALUE;
@@ -741,6 +817,21 @@ abstract class ArrayVector<E extends Number> extends Vector implements CheckedCo
             return index;
         }
 
+        /** Returns whether this vector in the given range is equals to the specified vector. */
+        @Override final boolean equals(int lower, final int upper, final Vector other, int otherOffset) {
+            if (other.getClass() == getClass()) {
+                // TODO: replace by Arrays.equals(…) with JDK9.
+                final byte[] cmp = ((Bytes) other).array;
+                while (lower < upper) {
+                    if (array[lower++] != cmp[otherOffset++]) {
+                        return false;
+                    }
+                }
+                return true;
+            }
+            return super.equals(lower, upper, other, otherOffset);
+        }
+
         /** Finds the minimum and maximum values in the array or in a subset of the array. */
         @Override NumberRange<?> range(final IntSupplier indices, int n) {
             byte min = Byte.MAX_VALUE;
diff --git a/core/sis-utility/src/main/java/org/apache/sis/math/RepeatedVector.java b/core/sis-utility/src/main/java/org/apache/sis/math/RepeatedVector.java
index c50fa0e..0a9b7a0 100644
--- a/core/sis-utility/src/main/java/org/apache/sis/math/RepeatedVector.java
+++ b/core/sis-utility/src/main/java/org/apache/sis/math/RepeatedVector.java
@@ -199,7 +199,9 @@ final class RepeatedVector extends Vector implements Serializable {
     }
 
     /**
-     * Returns {@code this} since this vector is considered already compressed.
+     * Returns {@code this} since this vector is considered already compressed. Actually it may be possible to compress more
+     * if the {@linkplain #base} vector has been modified after {@code RepeatedVector} construction. But it should not happen
+     * since this vector is read-only and {@link Vector#compress(double)} recommends to not keep reference to the original vector.
      */
     @Override
     @SuppressWarnings("ReturnOfCollectionOrArrayField")
diff --git a/core/sis-utility/src/main/java/org/apache/sis/math/Vector.java b/core/sis-utility/src/main/java/org/apache/sis/math/Vector.java
index 355318f..959b42b 100644
--- a/core/sis-utility/src/main/java/org/apache/sis/math/Vector.java
+++ b/core/sis-utility/src/main/java/org/apache/sis/math/Vector.java
@@ -20,6 +20,7 @@ import java.io.Serializable;
 import java.util.Arrays;
 import java.util.AbstractList;
 import java.util.RandomAccess;
+import java.util.StringJoiner;
 import java.util.function.IntSupplier;
 import org.apache.sis.measure.NumberRange;
 import org.apache.sis.util.Numbers;
@@ -538,7 +539,7 @@ public abstract class Vector extends AbstractList<Number> implements RandomAcces
         final int size = size();
         if (size >= 2) {
             /*
-             * For the firt level of repetitions, we rely on a method to be overridden by subclasses
+             * For the first level of repetitions, we rely on a method to be overridden by subclasses
              * for detecting the length of consecutive identical numbers. We could have use the more
              * generic algorithm based on 'equals(int, int, Vector, int)' instead, but this approach
              * is faster.
@@ -556,13 +557,33 @@ public abstract class Vector extends AbstractList<Number> implements RandomAcces
              * At this point r0 is the number of identical consecutive numbers in vectors like (x,x,x, y,y,y, z,z,z)
              * and shall not be modified anymore for the rest of this method. This is the first integer value in the
              * array to be returned. Following algorithm applies to deeper levels.
+             *
+             * The 'skip' variable is an optimization. Code below would work with skip = 0 all the times, but this is
+             * very slow when r0 = 1 because equals(…) is invoked for all values.  Computing an amount of values that
+             * we can skip in the special case where r0 = 1 increases the speed a lot.
              */
-            final int skip = (r0 == 1) ? 1 : 0;     // Optimization (code below would work with skip = 0 all the times).
+            final int skip = (r0 == 1) ? indexOf(0, 1, false) : 0;
             int r = 0;
 nextMatch:  for (;;) {
                 r += r0;
                 if (skip != 0) {
-                    r = indexOf(0, r, true);        // Optimization for reducing the number of method calls when r0 = 1.
+                    /*
+                     * Optimization for reducing the number of method calls when r0 = 1: the default algorithm is to
+                     * search for a position multiple of 'r0' where all values since the beginning of the vector are
+                     * repeated. But if 'r0' is 1, the default algorithms perform a costly check at every positions.
+                     * To avoid that, we use 'indexOf' for searching the index of the next position where a match may
+                     * exist (we don't care anymore about multiples of r0 since r0 is 1). If the first expected values
+                     * are constants, we use 'indexOf' again for checking efficiently those constants.
+                     */
+                    r = indexOf(0, r, true);
+                    if (skip != 1) {
+                        if (r + skip >= size) break;
+                        final int end = indexOf(r, r+1, false);
+                        if (end - r != skip) {
+                            r = end - 1;
+                            continue;
+                        }
+                    }
                 }
                 if (r >= size) break;
                 if (equals(skip, Math.min(r0, size - r), this, r + skip)) {
@@ -655,19 +676,29 @@ nextMatch:  for (;;) {
              *     doubleValue(i)  =  first + increment*i
              *
              * The intent is that if tolerance = 0 and this method returns a non-null value, then replacing
-             * this vector by an instance of SequenceVector should produce exactely the same double values.
+             * this vector by an instance of SequenceVector should produce exactly the same double values,
+             * in the limit of the accuracy allowed by the floating point values.
              */
             if (type >= Numbers.FLOAT && type <= Numbers.DOUBLE) {
                 final double first = doubleValue(0);
                 final double inc = (doubleValue(--i) - first) / i;
-                while (i >= 1) {
-                    if (!(Math.abs(first + inc*i - doubleValue(i--)) <= tolerance)) {       // Use '!' for catching NaN.
-                        return null;
-                    }
-                }
                 if (type == Numbers.FLOAT) {
+                    while (i >= 1) {
+                        final float  value = floatValue(i);
+                        final double delta = Math.abs(first + inc*i-- - value);
+                        final double accur = Math.ulp(value);
+                        if (!((accur > tolerance) ? (delta < accur) : (delta <= tolerance))) {  // Use '!' for catching NaN.
+                            return null;
+                        }
+                    }
                     final float f = (float) inc;
-                    if (f == inc) return f;             // Use the java.lang.Float wrapper class if possible.
+                    if (f == inc) return f;                            // Use the java.lang.Float wrapper class if possible.
+                } else {
+                    while (i >= 1) {
+                        if (!(Math.abs(first + inc*i - doubleValue(i--)) <= tolerance)) {       // Use '!' for catching NaN.
+                            return null;
+                        }
+                    }
                 }
                 return inc;
             }
@@ -885,6 +916,17 @@ nextMatch:  for (;;) {
             }
             return Vector.this.range(supplier, n);
         }
+
+        /**
+         * If the vector can not be compressed, copies data in a new vector in order to have a smaller vector
+         * than the one backing this view. This is advantageous only on the assumption that user do not keep
+         * a reference to the original vector after {@link Vector#compress(double)} call.
+         */
+        @Override
+        public Vector compress(final double tolerance) {
+            final Vector c = super.compress(tolerance);
+            return (c != this) ? c : copy();
+        }
     }
 
     /**
@@ -1054,6 +1096,17 @@ nextMatch:  for (;;) {
                 }, n);
             }
         }
+
+        /**
+         * If the vector can not be compressed, copies data in a new vector in order to have a smaller vector
+         * than the one backing this view. This is advantageous only on the assumption that user do not keep
+         * a reference to the original vector after {@link Vector#compress(double)} call.
+         */
+        @Override
+        public Vector compress(final double tolerance) {
+            final Vector c = super.compress(tolerance);
+            return (c != this) ? c : copy();
+        }
     }
 
     /**
@@ -1228,6 +1281,67 @@ nextMatch:  for (;;) {
     }
 
     /**
+     * Returns a copy of this vector. The returned vector is writable, and changes in that vector has no effect
+     * on this original vector. The copy is not a clone since it may not be an instance of the same class.
+     *
+     * @return a copy of this vector.
+     */
+    final Vector copy() {
+        final Object array;
+        final int size = size();
+        switch (Numbers.getEnumConstant(getElementType())) {
+            case Numbers.DOUBLE: {
+                array = doubleValues();
+                break;
+            }
+            case Numbers.FLOAT: {
+                array = floatValues();
+                if (size != 0 && get(0) instanceof Double) {
+                    return createForDecimal((float[]) array);
+                }
+                break;
+            }
+            case Numbers.LONG: {
+                final long[] data = new long[size];
+                for (int i=0; i<size; i++) {
+                    data[i] = longValue(i);
+                }
+                array = data;
+                break;
+            }
+            case Numbers.INTEGER: {
+                final int[] data = new int[size];
+                for (int i=0; i<size; i++) {
+                    data[i] = (int) longValue(i);           // For handling both signed and unsigned integers.
+                }
+                array = data;
+                break;
+            }
+            case Numbers.SHORT: {
+                final short[] data = new short[size];
+                for (int i=0; i<size; i++) {
+                    data[i] = (short) intValue(i);          // For handling both signed and unsigned integers.
+                }
+                array = data;
+                break;
+            }
+            case Numbers.BYTE: {
+                final byte[] data = new byte[size];
+                for (int i=0; i<size; i++) {
+                    data[i] = (byte) intValue(i);           // For handling both signed and unsigned integers.
+                }
+                array = data;
+                break;
+            }
+            default: {
+                array = toArray(new Number[size]);
+                break;
+            }
+        }
+        return ArrayVector.newInstance(array, isUnsigned());
+    }
+
+    /**
      * Returns a string representation of this vector.
      *
      * @return a string representation of this vector.
@@ -1236,17 +1350,12 @@ nextMatch:  for (;;) {
      */
     @Override
     public String toString() {
+        final StringJoiner buffer = new StringJoiner(", ", "[", "]");
         final int length = size();
-        if (length == 0) {
-            return "[]";
-        }
-        final StringBuilder buffer = new StringBuilder();
-        String separator = "[";
         for (int i=0; i<length; i++) {
-            buffer.append(separator).append(stringValue(i));
-            separator = ", ";
+            buffer.add(stringValue(i));
         }
-        return buffer.append(']').toString();
+        return buffer.toString();
     }
 
     /**
@@ -1306,10 +1415,8 @@ nextMatch:  for (;;) {
      * @param  other        the other vector to compare values with this vector. May be {@code this}.
      * @param  otherOffset  index of the first element to compare in the other vector.
      * @return whether values over the specified range of the two vectors are equal.
-     *
-     * @todo Override in {@link ArrayVector} on JDK9.
      */
-    private boolean equals(int lower, final int upper, final Vector other, int otherOffset) {
+    boolean equals(int lower, final int upper, final Vector other, int otherOffset) {
         while (lower < upper) {
             if (!get(lower++).equals(other.get(otherOffset++))) {
                 return false;
diff --git a/core/sis-utility/src/test/java/org/apache/sis/math/RepeatedVectorTest.java b/core/sis-utility/src/test/java/org/apache/sis/math/RepeatedVectorTest.java
index 9f0a5e8..b4dceaf 100644
--- a/core/sis-utility/src/test/java/org/apache/sis/math/RepeatedVectorTest.java
+++ b/core/sis-utility/src/test/java/org/apache/sis/math/RepeatedVectorTest.java
@@ -19,7 +19,7 @@ package org.apache.sis.math;
 import org.apache.sis.test.TestCase;
 import org.junit.Test;
 
-import static org.opengis.test.Assert.*;
+import static org.junit.Assert.*;
 
 
 /**
@@ -42,7 +42,6 @@ public final strictfp class RepeatedVectorTest extends TestCase {
                 15, 15, 15, 15}, false);
 
         vec = new RepeatedVector(vec, vec.repetitions(), 0);
-        assertInstanceOf("Should have been compressed.", RepeatedVector.class, vec);
         assertArrayEquals(new int[] {4}, vec.repetitions());
 
         assertEquals(10, vec.intValue  ( 0));
@@ -70,7 +69,6 @@ public final strictfp class RepeatedVectorTest extends TestCase {
                 10, 12, 15, 18}, false);
 
         vec = new RepeatedVector(vec, vec.repetitions(), 0);
-        assertInstanceOf("Should have been compressed.", RepeatedVector.class, vec);
         assertArrayEquals(new int[] {1,4}, vec.repetitions());
 
         assertEquals(10, vec.intValue  ( 0));
@@ -98,7 +96,6 @@ public final strictfp class RepeatedVectorTest extends TestCase {
                 10, 10, 10,  12, 12, 12,  15, 15, 15,  18, 18, 18}, false);
 
         vec = new RepeatedVector(vec, vec.repetitions(), 0);
-        assertInstanceOf("Should have been compressed.", RepeatedVector.class, vec);
         assertArrayEquals(new int[] {3,4}, vec.repetitions());
 
         assertEquals(10, vec.intValue  ( 0));
@@ -123,4 +120,53 @@ public final strictfp class RepeatedVectorTest extends TestCase {
         assertFalse("Expected the backing array.", sub instanceof RepeatedVector);
         assertArrayEquals(new float[] {10, 12, 15, 18}, sub.floatValues(), (float) STRICT);
     }
+
+    /**
+     * Tests the case where values in a grid are repeated vertically with a constant prefix.
+     * This case has a special code path for performance reasons.
+     */
+    @Test
+    public void testVerticalWithConstantPrefix() {
+        testVerticalWithConstantPrefix(10);
+        testVerticalWithConstantPrefix(11);
+    }
+
+    /**
+     * Implementation of {@link #testVerticalWithConstantPrefix()} with the possibility
+     * to inject an "impurity" in the side of constant values.
+     *
+     * @param  ip  10 for constant values on left side, or another value for injecting an "impurity".
+     */
+    private static void testVerticalWithConstantPrefix(final int ip) {
+        Vector vec = Vector.create(new int[] {
+                10, 10, 10,  12, 15, 18,
+                10, ip, 10,  12, 15, 18,
+                10, 10, 10,  12, 15, 18}, false);
+
+        vec = new RepeatedVector(vec, vec.repetitions(), 0);
+        final int expectedBackingVectorLength = (ip == 10) ? 6 : 12;
+        assertArrayEquals(new int[] {1, expectedBackingVectorLength}, vec.repetitions());
+
+        assertEquals(10, vec.intValue  ( 0));
+        assertEquals(10, vec.shortValue( 1));
+        assertEquals(10, vec.longValue ( 2));
+        assertEquals(12, vec.intValue  ( 3));
+        assertEquals(15, vec.intValue  ( 4));
+        assertEquals(18, vec.longValue ( 5));
+        assertEquals(10, vec.intValue  ( 6));
+        assertEquals(ip, vec.intValue  ( 7));
+        assertEquals(10, vec.intValue  ( 8));
+        assertEquals(10, vec.intValue  (13));
+        assertEquals(15, vec.shortValue(10));
+        assertEquals(12, vec.longValue (15));
+        assertEquals(18, vec.intValue  (17));
+
+        Vector sub = vec.subSampling(0, 1, 6);
+        assertFalse("Expected the backing array.", sub instanceof RepeatedVector);
+        assertArrayEquals(new float[] {10, 10, 10, 12, 15, 18}, sub.floatValues(), (float) STRICT);
+
+        sub = vec.subSampling(0, 1, 12);
+        assertArrayEquals(new float[] {10, 10, 10, 12, 15, 18,
+                                       10, ip, 10, 12, 15, 18}, sub.floatValues(), (float) STRICT);
+    }
 }