You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@sis.apache.org by de...@apache.org on 2018/11/22 19:57:08 UTC
[sis] 01/02: Improve performance of Vector.repetitions(). It make a
big difference for large grids like HYCOM.
This is an automated email from the ASF dual-hosted git repository.
desruisseaux pushed a commit to branch geoapi-4.0
in repository https://gitbox.apache.org/repos/asf/sis.git
commit 9f76515a09f31219c5f9451469be17a6f9b8f200
Author: Martin Desruisseaux <ma...@geomatys.com>
AuthorDate: Thu Nov 22 20:55:44 2018 +0100
Improve performance of Vector.repetitions(). It make a big difference for large grids like HYCOM.
---
.../main/java/org/apache/sis/math/ArrayVector.java | 93 ++++++++++++-
.../java/org/apache/sis/math/RepeatedVector.java | 4 +-
.../src/main/java/org/apache/sis/math/Vector.java | 149 ++++++++++++++++++---
.../org/apache/sis/math/RepeatedVectorTest.java | 54 +++++++-
4 files changed, 273 insertions(+), 27 deletions(-)
diff --git a/core/sis-utility/src/main/java/org/apache/sis/math/ArrayVector.java b/core/sis-utility/src/main/java/org/apache/sis/math/ArrayVector.java
index 4e4e032..8936dc9 100644
--- a/core/sis-utility/src/main/java/org/apache/sis/math/ArrayVector.java
+++ b/core/sis-utility/src/main/java/org/apache/sis/math/ArrayVector.java
@@ -251,8 +251,24 @@ abstract class ArrayVector<E extends Number> extends Vector implements CheckedCo
return index;
}
+ /** Returns whether this vector in the given range is equals to the specified vector. */
+ @Override boolean equals(int lower, final int upper, final Vector other, int otherOffset) {
+ if (other instanceof Doubles) {
+ // TODO: replace by Arrays.equals(…) with JDK9.
+ final double[] cmp = ((Doubles) other).array;
+ while (lower < upper) {
+ if (Double.doubleToLongBits(array[lower++]) != Double.doubleToLongBits(cmp[otherOffset++])) {
+ return false;
+ }
+ }
+ return true;
+ }
+ return super.equals(lower, upper, other, otherOffset);
+ }
+
/** Finds the minimum and maximum values in the array or in a subset of the array. */
@Override NumberRange<Double> range(final IntSupplier indices, int n) {
+ // TODO: try to paralellize with streams.
double min = Double.POSITIVE_INFINITY;
double max = Double.NEGATIVE_INFINITY;
while (--n >= 0) {
@@ -331,12 +347,27 @@ abstract class ArrayVector<E extends Number> extends Vector implements CheckedCo
}
/** Finds index of a match or mismatch (depending on {@code equality}). */
- @Override int indexOf(final int toSearch, int index, final boolean equality) {
+ @Override final int indexOf(final int toSearch, int index, final boolean equality) {
final int first = Float.floatToIntBits(array[toSearch]);
while (index < array.length && (first == Float.floatToIntBits(array[index])) != equality) index++;
return index;
}
+ /** Returns whether this vector in the given range is equals to the specified vector. */
+ @Override final boolean equals(int lower, final int upper, final Vector other, int otherOffset) {
+ if (other.getClass() == getClass()) {
+ // TODO: replace by Arrays.equals(…) with JDK9.
+ final float[] cmp = ((Floats) other).array;
+ while (lower < upper) {
+ if (Float.floatToIntBits(array[lower++]) != Float.floatToIntBits(cmp[otherOffset++])) {
+ return false;
+ }
+ }
+ return true;
+ }
+ return super.equals(lower, upper, other, otherOffset);
+ }
+
/** Finds the minimum and maximum values in the array or in a subset of the array. */
@Override final NumberRange<?> range(final IntSupplier indices, int n) {
float min = Float.POSITIVE_INFINITY;
@@ -461,6 +492,21 @@ abstract class ArrayVector<E extends Number> extends Vector implements CheckedCo
return index;
}
+ /** Returns whether this vector in the given range is equals to the specified vector. */
+ @Override final boolean equals(int lower, final int upper, final Vector other, int otherOffset) {
+ if (other.getClass() == getClass()) {
+ // TODO: replace by Arrays.equals(…) with JDK9.
+ final long[] cmp = ((Longs) other).array;
+ while (lower < upper) {
+ if (array[lower++] != cmp[otherOffset++]) {
+ return false;
+ }
+ }
+ return true;
+ }
+ return super.equals(lower, upper, other, otherOffset);
+ }
+
/** Finds the minimum and maximum values in the array or in a subset of the array. */
@Override NumberRange<?> range(final IntSupplier indices, int n) {
long min = Long.MAX_VALUE;
@@ -560,6 +606,21 @@ abstract class ArrayVector<E extends Number> extends Vector implements CheckedCo
return index;
}
+ /** Returns whether this vector in the given range is equals to the specified vector. */
+ @Override final boolean equals(int lower, final int upper, final Vector other, int otherOffset) {
+ if (other.getClass() == getClass()) {
+ // TODO: replace by Arrays.equals(…) with JDK9.
+ final int[] cmp = ((Integers) other).array;
+ while (lower < upper) {
+ if (array[lower++] != cmp[otherOffset++]) {
+ return false;
+ }
+ }
+ return true;
+ }
+ return super.equals(lower, upper, other, otherOffset);
+ }
+
/** Finds the minimum and maximum values in the array or in a subset of the array. */
@Override NumberRange<?> range(final IntSupplier indices, int n) {
int min = Integer.MAX_VALUE;
@@ -663,6 +724,21 @@ abstract class ArrayVector<E extends Number> extends Vector implements CheckedCo
return index;
}
+ /** Returns whether this vector in the given range is equals to the specified vector. */
+ @Override final boolean equals(int lower, final int upper, final Vector other, int otherOffset) {
+ if (other.getClass() == getClass()) {
+ // TODO: replace by Arrays.equals(…) with JDK9.
+ final short[] cmp = ((Shorts) other).array;
+ while (lower < upper) {
+ if (array[lower++] != cmp[otherOffset++]) {
+ return false;
+ }
+ }
+ return true;
+ }
+ return super.equals(lower, upper, other, otherOffset);
+ }
+
/** Finds the minimum and maximum values in the array or in a subset of the array. */
@Override NumberRange<?> range(final IntSupplier indices, int n) {
short min = Short.MAX_VALUE;
@@ -741,6 +817,21 @@ abstract class ArrayVector<E extends Number> extends Vector implements CheckedCo
return index;
}
+ /** Returns whether this vector in the given range is equals to the specified vector. */
+ @Override final boolean equals(int lower, final int upper, final Vector other, int otherOffset) {
+ if (other.getClass() == getClass()) {
+ // TODO: replace by Arrays.equals(…) with JDK9.
+ final byte[] cmp = ((Bytes) other).array;
+ while (lower < upper) {
+ if (array[lower++] != cmp[otherOffset++]) {
+ return false;
+ }
+ }
+ return true;
+ }
+ return super.equals(lower, upper, other, otherOffset);
+ }
+
/** Finds the minimum and maximum values in the array or in a subset of the array. */
@Override NumberRange<?> range(final IntSupplier indices, int n) {
byte min = Byte.MAX_VALUE;
diff --git a/core/sis-utility/src/main/java/org/apache/sis/math/RepeatedVector.java b/core/sis-utility/src/main/java/org/apache/sis/math/RepeatedVector.java
index c50fa0e..0a9b7a0 100644
--- a/core/sis-utility/src/main/java/org/apache/sis/math/RepeatedVector.java
+++ b/core/sis-utility/src/main/java/org/apache/sis/math/RepeatedVector.java
@@ -199,7 +199,9 @@ final class RepeatedVector extends Vector implements Serializable {
}
/**
- * Returns {@code this} since this vector is considered already compressed.
+ * Returns {@code this} since this vector is considered already compressed. Actually it may be possible to compress more
+ * if the {@linkplain #base} vector has been modified after {@code RepeatedVector} construction. But it should not happen
+ * since this vector is read-only and {@link Vector#compress(double)} recommends to not keep reference to the original vector.
*/
@Override
@SuppressWarnings("ReturnOfCollectionOrArrayField")
diff --git a/core/sis-utility/src/main/java/org/apache/sis/math/Vector.java b/core/sis-utility/src/main/java/org/apache/sis/math/Vector.java
index 355318f..959b42b 100644
--- a/core/sis-utility/src/main/java/org/apache/sis/math/Vector.java
+++ b/core/sis-utility/src/main/java/org/apache/sis/math/Vector.java
@@ -20,6 +20,7 @@ import java.io.Serializable;
import java.util.Arrays;
import java.util.AbstractList;
import java.util.RandomAccess;
+import java.util.StringJoiner;
import java.util.function.IntSupplier;
import org.apache.sis.measure.NumberRange;
import org.apache.sis.util.Numbers;
@@ -538,7 +539,7 @@ public abstract class Vector extends AbstractList<Number> implements RandomAcces
final int size = size();
if (size >= 2) {
/*
- * For the firt level of repetitions, we rely on a method to be overridden by subclasses
+ * For the first level of repetitions, we rely on a method to be overridden by subclasses
* for detecting the length of consecutive identical numbers. We could have use the more
* generic algorithm based on 'equals(int, int, Vector, int)' instead, but this approach
* is faster.
@@ -556,13 +557,33 @@ public abstract class Vector extends AbstractList<Number> implements RandomAcces
* At this point r0 is the number of identical consecutive numbers in vectors like (x,x,x, y,y,y, z,z,z)
* and shall not be modified anymore for the rest of this method. This is the first integer value in the
* array to be returned. Following algorithm applies to deeper levels.
+ *
+ * The 'skip' variable is an optimization. Code below would work with skip = 0 all the times, but this is
+ * very slow when r0 = 1 because equals(…) is invoked for all values. Computing an amount of values that
+ * we can skip in the special case where r0 = 1 increases the speed a lot.
*/
- final int skip = (r0 == 1) ? 1 : 0; // Optimization (code below would work with skip = 0 all the times).
+ final int skip = (r0 == 1) ? indexOf(0, 1, false) : 0;
int r = 0;
nextMatch: for (;;) {
r += r0;
if (skip != 0) {
- r = indexOf(0, r, true); // Optimization for reducing the number of method calls when r0 = 1.
+ /*
+ * Optimization for reducing the number of method calls when r0 = 1: the default algorithm is to
+ * search for a position multiple of 'r0' where all values since the beginning of the vector are
+ * repeated. But if 'r0' is 1, the default algorithms perform a costly check at every positions.
+ * To avoid that, we use 'indexOf' for searching the index of the next position where a match may
+ * exist (we don't care anymore about multiples of r0 since r0 is 1). If the first expected values
+ * are constants, we use 'indexOf' again for checking efficiently those constants.
+ */
+ r = indexOf(0, r, true);
+ if (skip != 1) {
+ if (r + skip >= size) break;
+ final int end = indexOf(r, r+1, false);
+ if (end - r != skip) {
+ r = end - 1;
+ continue;
+ }
+ }
}
if (r >= size) break;
if (equals(skip, Math.min(r0, size - r), this, r + skip)) {
@@ -655,19 +676,29 @@ nextMatch: for (;;) {
* doubleValue(i) = first + increment*i
*
* The intent is that if tolerance = 0 and this method returns a non-null value, then replacing
- * this vector by an instance of SequenceVector should produce exactely the same double values.
+ * this vector by an instance of SequenceVector should produce exactly the same double values,
+ * in the limit of the accuracy allowed by the floating point values.
*/
if (type >= Numbers.FLOAT && type <= Numbers.DOUBLE) {
final double first = doubleValue(0);
final double inc = (doubleValue(--i) - first) / i;
- while (i >= 1) {
- if (!(Math.abs(first + inc*i - doubleValue(i--)) <= tolerance)) { // Use '!' for catching NaN.
- return null;
- }
- }
if (type == Numbers.FLOAT) {
+ while (i >= 1) {
+ final float value = floatValue(i);
+ final double delta = Math.abs(first + inc*i-- - value);
+ final double accur = Math.ulp(value);
+ if (!((accur > tolerance) ? (delta < accur) : (delta <= tolerance))) { // Use '!' for catching NaN.
+ return null;
+ }
+ }
final float f = (float) inc;
- if (f == inc) return f; // Use the java.lang.Float wrapper class if possible.
+ if (f == inc) return f; // Use the java.lang.Float wrapper class if possible.
+ } else {
+ while (i >= 1) {
+ if (!(Math.abs(first + inc*i - doubleValue(i--)) <= tolerance)) { // Use '!' for catching NaN.
+ return null;
+ }
+ }
}
return inc;
}
@@ -885,6 +916,17 @@ nextMatch: for (;;) {
}
return Vector.this.range(supplier, n);
}
+
+ /**
+ * If the vector can not be compressed, copies data in a new vector in order to have a smaller vector
+ * than the one backing this view. This is advantageous only on the assumption that user do not keep
+ * a reference to the original vector after {@link Vector#compress(double)} call.
+ */
+ @Override
+ public Vector compress(final double tolerance) {
+ final Vector c = super.compress(tolerance);
+ return (c != this) ? c : copy();
+ }
}
/**
@@ -1054,6 +1096,17 @@ nextMatch: for (;;) {
}, n);
}
}
+
+ /**
+ * If the vector can not be compressed, copies data in a new vector in order to have a smaller vector
+ * than the one backing this view. This is advantageous only on the assumption that user do not keep
+ * a reference to the original vector after {@link Vector#compress(double)} call.
+ */
+ @Override
+ public Vector compress(final double tolerance) {
+ final Vector c = super.compress(tolerance);
+ return (c != this) ? c : copy();
+ }
}
/**
@@ -1228,6 +1281,67 @@ nextMatch: for (;;) {
}
/**
+ * Returns a copy of this vector. The returned vector is writable, and changes in that vector has no effect
+ * on this original vector. The copy is not a clone since it may not be an instance of the same class.
+ *
+ * @return a copy of this vector.
+ */
+ final Vector copy() {
+ final Object array;
+ final int size = size();
+ switch (Numbers.getEnumConstant(getElementType())) {
+ case Numbers.DOUBLE: {
+ array = doubleValues();
+ break;
+ }
+ case Numbers.FLOAT: {
+ array = floatValues();
+ if (size != 0 && get(0) instanceof Double) {
+ return createForDecimal((float[]) array);
+ }
+ break;
+ }
+ case Numbers.LONG: {
+ final long[] data = new long[size];
+ for (int i=0; i<size; i++) {
+ data[i] = longValue(i);
+ }
+ array = data;
+ break;
+ }
+ case Numbers.INTEGER: {
+ final int[] data = new int[size];
+ for (int i=0; i<size; i++) {
+ data[i] = (int) longValue(i); // For handling both signed and unsigned integers.
+ }
+ array = data;
+ break;
+ }
+ case Numbers.SHORT: {
+ final short[] data = new short[size];
+ for (int i=0; i<size; i++) {
+ data[i] = (short) intValue(i); // For handling both signed and unsigned integers.
+ }
+ array = data;
+ break;
+ }
+ case Numbers.BYTE: {
+ final byte[] data = new byte[size];
+ for (int i=0; i<size; i++) {
+ data[i] = (byte) intValue(i); // For handling both signed and unsigned integers.
+ }
+ array = data;
+ break;
+ }
+ default: {
+ array = toArray(new Number[size]);
+ break;
+ }
+ }
+ return ArrayVector.newInstance(array, isUnsigned());
+ }
+
+ /**
* Returns a string representation of this vector.
*
* @return a string representation of this vector.
@@ -1236,17 +1350,12 @@ nextMatch: for (;;) {
*/
@Override
public String toString() {
+ final StringJoiner buffer = new StringJoiner(", ", "[", "]");
final int length = size();
- if (length == 0) {
- return "[]";
- }
- final StringBuilder buffer = new StringBuilder();
- String separator = "[";
for (int i=0; i<length; i++) {
- buffer.append(separator).append(stringValue(i));
- separator = ", ";
+ buffer.add(stringValue(i));
}
- return buffer.append(']').toString();
+ return buffer.toString();
}
/**
@@ -1306,10 +1415,8 @@ nextMatch: for (;;) {
* @param other the other vector to compare values with this vector. May be {@code this}.
* @param otherOffset index of the first element to compare in the other vector.
* @return whether values over the specified range of the two vectors are equal.
- *
- * @todo Override in {@link ArrayVector} on JDK9.
*/
- private boolean equals(int lower, final int upper, final Vector other, int otherOffset) {
+ boolean equals(int lower, final int upper, final Vector other, int otherOffset) {
while (lower < upper) {
if (!get(lower++).equals(other.get(otherOffset++))) {
return false;
diff --git a/core/sis-utility/src/test/java/org/apache/sis/math/RepeatedVectorTest.java b/core/sis-utility/src/test/java/org/apache/sis/math/RepeatedVectorTest.java
index 9f0a5e8..b4dceaf 100644
--- a/core/sis-utility/src/test/java/org/apache/sis/math/RepeatedVectorTest.java
+++ b/core/sis-utility/src/test/java/org/apache/sis/math/RepeatedVectorTest.java
@@ -19,7 +19,7 @@ package org.apache.sis.math;
import org.apache.sis.test.TestCase;
import org.junit.Test;
-import static org.opengis.test.Assert.*;
+import static org.junit.Assert.*;
/**
@@ -42,7 +42,6 @@ public final strictfp class RepeatedVectorTest extends TestCase {
15, 15, 15, 15}, false);
vec = new RepeatedVector(vec, vec.repetitions(), 0);
- assertInstanceOf("Should have been compressed.", RepeatedVector.class, vec);
assertArrayEquals(new int[] {4}, vec.repetitions());
assertEquals(10, vec.intValue ( 0));
@@ -70,7 +69,6 @@ public final strictfp class RepeatedVectorTest extends TestCase {
10, 12, 15, 18}, false);
vec = new RepeatedVector(vec, vec.repetitions(), 0);
- assertInstanceOf("Should have been compressed.", RepeatedVector.class, vec);
assertArrayEquals(new int[] {1,4}, vec.repetitions());
assertEquals(10, vec.intValue ( 0));
@@ -98,7 +96,6 @@ public final strictfp class RepeatedVectorTest extends TestCase {
10, 10, 10, 12, 12, 12, 15, 15, 15, 18, 18, 18}, false);
vec = new RepeatedVector(vec, vec.repetitions(), 0);
- assertInstanceOf("Should have been compressed.", RepeatedVector.class, vec);
assertArrayEquals(new int[] {3,4}, vec.repetitions());
assertEquals(10, vec.intValue ( 0));
@@ -123,4 +120,53 @@ public final strictfp class RepeatedVectorTest extends TestCase {
assertFalse("Expected the backing array.", sub instanceof RepeatedVector);
assertArrayEquals(new float[] {10, 12, 15, 18}, sub.floatValues(), (float) STRICT);
}
+
+ /**
+ * Tests the case where values in a grid are repeated vertically with a constant prefix.
+ * This case has a special code path for performance reasons.
+ */
+ @Test
+ public void testVerticalWithConstantPrefix() {
+ testVerticalWithConstantPrefix(10);
+ testVerticalWithConstantPrefix(11);
+ }
+
+ /**
+ * Implementation of {@link #testVerticalWithConstantPrefix()} with the possibility
+ * to inject an "impurity" in the side of constant values.
+ *
+ * @param ip 10 for constant values on left side, or another value for injecting an "impurity".
+ */
+ private static void testVerticalWithConstantPrefix(final int ip) {
+ Vector vec = Vector.create(new int[] {
+ 10, 10, 10, 12, 15, 18,
+ 10, ip, 10, 12, 15, 18,
+ 10, 10, 10, 12, 15, 18}, false);
+
+ vec = new RepeatedVector(vec, vec.repetitions(), 0);
+ final int expectedBackingVectorLength = (ip == 10) ? 6 : 12;
+ assertArrayEquals(new int[] {1, expectedBackingVectorLength}, vec.repetitions());
+
+ assertEquals(10, vec.intValue ( 0));
+ assertEquals(10, vec.shortValue( 1));
+ assertEquals(10, vec.longValue ( 2));
+ assertEquals(12, vec.intValue ( 3));
+ assertEquals(15, vec.intValue ( 4));
+ assertEquals(18, vec.longValue ( 5));
+ assertEquals(10, vec.intValue ( 6));
+ assertEquals(ip, vec.intValue ( 7));
+ assertEquals(10, vec.intValue ( 8));
+ assertEquals(10, vec.intValue (13));
+ assertEquals(15, vec.shortValue(10));
+ assertEquals(12, vec.longValue (15));
+ assertEquals(18, vec.intValue (17));
+
+ Vector sub = vec.subSampling(0, 1, 6);
+ assertFalse("Expected the backing array.", sub instanceof RepeatedVector);
+ assertArrayEquals(new float[] {10, 10, 10, 12, 15, 18}, sub.floatValues(), (float) STRICT);
+
+ sub = vec.subSampling(0, 1, 12);
+ assertArrayEquals(new float[] {10, 10, 10, 12, 15, 18,
+ 10, ip, 10, 12, 15, 18}, sub.floatValues(), (float) STRICT);
+ }
}