You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@commons.apache.org by ah...@apache.org on 2023/09/04 10:55:11 UTC
[commons-numbers] branch master updated: NUMBERS-202: Add DD to the user guide
This is an automated email from the ASF dual-hosted git repository.
aherbert pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/commons-numbers.git
The following commit(s) were added to refs/heads/master by this push:
new 589712db NUMBERS-202: Add DD to the user guide
589712db is described below
commit 589712db0e6024ff888908f4f59f6365ed35526b
Author: Alex Herbert <ah...@apache.org>
AuthorDate: Wed Aug 30 22:13:20 2023 +0100
NUMBERS-202: Add DD to the user guide
---
.../java/org/apache/commons/numbers/core/DD.java | 2 +-
.../apache/commons/numbers/core/UserGuideTest.java | 98 +++++++-
src/site/apt/userguide/index.apt | 262 ++++++++++++++++++++-
3 files changed, 359 insertions(+), 3 deletions(-)
diff --git a/commons-numbers-core/src/main/java/org/apache/commons/numbers/core/DD.java b/commons-numbers-core/src/main/java/org/apache/commons/numbers/core/DD.java
index 039f8e83..08326617 100644
--- a/commons-numbers-core/src/main/java/org/apache/commons/numbers/core/DD.java
+++ b/commons-numbers-core/src/main/java/org/apache/commons/numbers/core/DD.java
@@ -75,7 +75,7 @@ import java.util.function.DoubleUnaryOperator;
*
* <p>It is not possible to directly specify the two parts of the number.
* The two parts must be added using {@link #ofSum(double, double) ofSum}.
- * If the two parts already represent a number such {@code (x, xx)} such that {@code x == x + xx}
+ * If the two parts already represent a number {@code (x, xx)} such that {@code x == x + xx}
* then the magnitudes of the parts will be unchanged; any signed zeros may be subject to a sign
* change.
*
diff --git a/commons-numbers-core/src/test/java/org/apache/commons/numbers/core/UserGuideTest.java b/commons-numbers-core/src/test/java/org/apache/commons/numbers/core/UserGuideTest.java
index 10f4d6e2..cca40013 100644
--- a/commons-numbers-core/src/test/java/org/apache/commons/numbers/core/UserGuideTest.java
+++ b/commons-numbers-core/src/test/java/org/apache/commons/numbers/core/UserGuideTest.java
@@ -16,6 +16,7 @@
*/
package org.apache.commons.numbers.core;
+import java.math.BigDecimal;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;
@@ -74,7 +75,6 @@ class UserGuideTest {
Assertions.assertEquals(-1.0, x2);
}
-
@Test
void testPrecision1() {
// Default allows no numbers between
@@ -140,4 +140,100 @@ class UserGuideTest {
Assertions.assertEquals(0.10000000000000009, Precision.representableDelta(1.0, 0.1));
}
+
+ @Test
+ void testDD1() {
+ double x = Math.PI;
+ int y = 42;
+ long z = -8564728970587006436L;
+ Assertions.assertEquals(x, DD.of(x).doubleValue());
+ Assertions.assertEquals(y, DD.of(y).intValue());
+ Assertions.assertEquals(z, DD.of(z).longValue());
+ Assertions.assertNotEquals(z, (long) (double) z);
+ }
+
+ @Test
+ void testDD2() {
+ BigDecimal pi = new BigDecimal("3.14159265358979323846264338327950288419716939937510");
+ DD x = DD.from(pi);
+ Assertions.assertEquals("(3.141592653589793,1.2246467991473532E-16)", x.toString());
+ Assertions.assertNotEquals(0, pi.compareTo(x.bigDecimalValue()));
+ Assertions.assertEquals(Math.PI, x.hi());
+ Assertions.assertEquals(pi.subtract(new BigDecimal(Math.PI)).doubleValue(), x.lo());
+
+ DD nan = DD.of(Double.NaN);
+ Assertions.assertFalse(nan.isFinite());
+ Assertions.assertThrows(NumberFormatException.class, () -> nan.bigDecimalValue());
+ }
+
+ @Test
+ void testDD3() {
+ long x = -8564728970587006436L;
+ Assertions.assertNotEquals(x + 1, DD.ONE.add(x).longValue());
+ Assertions.assertEquals(x + 1, DD.ONE.add(DD.of(x)).longValue());
+ }
+
+ @Test
+ void testDD4() {
+ double a = 1.2345678901234567;
+ double b = 123.45678901234567;
+ DD w = DD.ofProduct(a, b);
+ DD x = DD.ofSum(a, b);
+ DD y = DD.ofDifference(a, b);
+ DD z = DD.fromQuotient(1, 3);
+ Assertions.assertEquals("(152.41578753238835,-1.0325951435749745E-14)", w.toString());
+ Assertions.assertEquals("(124.69135690246912,-1.1102230246251565E-15)", x.toString());
+ Assertions.assertEquals("(-122.22222112222221,-1.1102230246251565E-15)", y.toString());
+ Assertions.assertEquals("(0.3333333333333333,1.850371707708594E-17)", z.toString());
+ Assertions.assertEquals(a * b, w.hi());
+ Assertions.assertEquals(a + b, x.hi());
+ Assertions.assertEquals(a - b, y.hi());
+ Assertions.assertEquals(1.0 / 3, z.hi());
+
+ DD zz = DD.of(1).divide(DD.of(3));
+ Assertions.assertEquals(z, zz);
+ }
+
+ @Test
+ void testDD5() {
+ Assertions.assertEquals(0.9999999999999999, 1.0 / 2 + 1.0 / 3 + 1.0 / 6);
+ DD z = DD.fromQuotient(1, 2)
+ .add(DD.fromQuotient(1, 3))
+ .add(DD.fromQuotient(1, 6));
+ Assertions.assertEquals("(1.0,-4.622231866529366E-33)", z.toString());
+ Assertions.assertEquals(1.0, z.doubleValue());
+ }
+
+ @Test
+ void testDD6() {
+ double a = 1;
+ double b = Math.pow(2, 53);
+ double c = Math.pow(2, 106);
+ DD z = DD.of(a).add(b).add(c).subtract(c).subtract(b);
+ Assertions.assertEquals(0.0, z.doubleValue());
+ }
+
+ @Test
+ void testDD7() {
+ double a = 1.5 * Math.pow(2, 1023);
+ double b = 4 * Math.pow(2, -1022);
+ DD x = DD.of(a);
+ DD y = DD.of(b);
+ Assertions.assertFalse(x.multiply(y).isFinite());
+
+ // Create fractional representation as [0.5, 1) * 2^b
+ int[] xb = {0};
+ int[] yb = {0};
+ x = x.frexp(xb); // (0.75, 0) * 2^1024
+ y = y.frexp(yb); // (0.5, 0) * 2^-1019
+ Assertions.assertEquals(0.75, x.doubleValue());
+ Assertions.assertEquals(0.5, y.doubleValue());
+ Assertions.assertEquals(1024, xb[0]);
+ Assertions.assertEquals(-1019, yb[0]);
+
+ DD z = x.multiply(y); // (0.375, 0)
+ Assertions.assertEquals(0.375, z.doubleValue());
+ // Rescale by 2^5
+ Assertions.assertEquals(a * b, z.scalb(xb[0] + yb[0]).doubleValue());
+ }
}
diff --git a/src/site/apt/userguide/index.apt b/src/site/apt/userguide/index.apt
index 8cf82a4d..3814faa1 100644
--- a/src/site/apt/userguide/index.apt
+++ b/src/site/apt/userguide/index.apt
@@ -35,6 +35,8 @@ The Apache Commons Numbers User Guide
* {{{Precision}Precision}}
+ * {{{DD:_double-double}DD: double-double}}
+
* {{{Field}Field}}
* {{{Fraction}Fraction}}
@@ -428,7 +430,7 @@ double x2 = Sum.of(1e100, 1, -2, -1e100).getAsDouble();
is therefore not a full <<<double-double>>> implementation, which would maintain the sum
as the current total and a round-off term. This difference makes the <<<Sum>>> class faster
at the cost of some accuracy during addition of terms that cancel.
-
+
If the terms to be subtracted are known then the summation can be split into the positive
and negative terms, summed in two parts and the result computed by a final subtraction of
the <<<Sum>>> of negative parts.
@@ -548,6 +550,264 @@ Precision.round(678.125, -2) // 700.0
Precision.representableDelta(1.0, 0.1) // 0.10000000000000009
+------------------------------------------+
+* DD: double-double
+
+ The <<<DD>>> class provides an extended precision floating-point number.
+ A double-double is an unevaluated sum of two IEEE <<<double>>> precision numbers capable of
+ representing at least 106 bits of significand. A normalized double-double number \( (x, xx) \)
+ satisfies the condition that the parts are non-overlapping in magnitude such that:
+
++------------------------------------------+
+|x| > |xx|
+x == x + xx
++------------------------------------------+
+
+ The implementation assumes a normalized representation during operations on a <<<DD>>>
+ number and computes results as a normalized representation. The number is limited to the
+ same exponent range as a <<<double>>>. Thus the number can be considered
+ as a standard IEEE <<<double>>> with additional information that would be lost to rounding.
+ This information can be used in arithmetic operations so that compound calculation can
+ reduce the error present in the final result returned as a <<<double>>>.
+
+ The number \( (x, xx) \) may also be referred to using the labels <high> and <low> to indicate
+ the magnitude of the parts as \( ( x_\{hi\}, x_\{lo\} ) \), or using a numerical suffix
+ for the parts as \( ( x_0, x_1 ) \). The numerical suffix is typically used when the
+ extended floating-point number has an arbitrary number of parts, for example a quad-double
+ is \( ( x_0, x_1, x_2, x_3 ) \). The parts of the <<<DD>>> can be accessed using the <<<hi()>>>
+ and <<<lo()>>> methods and the evaluated sum \( x + xx \) as <<<doubleValue()>>>.
+
+ The <<<DD>>> class is immutable. Instances are constructed with factory methods.
+ A <<<DD>>> can be constructed from a <<<double>>>, <<<int>>> or <<<long>>> primitive. In the
+ case of a <<<long>>> this maintains the full 64-bits of information; this is not possible
+ when using implicit type conversion to a <<<double>>>. These conversions can be reversed
+ as the <<<DD>>> provides type conversions using the <<<java.lang.Number>>> methods.
+
++------------------------------------------+
+double x = Math.PI;
+int y = 42;
+long z = -8564728970587006436L;
+DD.of(x).doubleValue() // = x
+DD.of(y).intValue() // = y
+DD.of(z).longValue() // = z
+ // (long) (double) z != z
++------------------------------------------+
+
+ The <<<DD>>> class can be created as the result of a <<<double>>> operation and the round-off
+ error. In the case of addition and multiplication the result is exact. For division the result
+ will have the closest 106-bit representation to the exact result.
+ Note that use of the factory constructors for <<<double>>> arithmetic is preferred over
+ constructing the arguments as a <<<DD>>> from a <<<double>>> and performing the equivalent
+ operation in <<<DD>>> arithmetic:
+
++------------------------------------------+
+double a = 1.2345678901234567;
+double b = 123.45678901234567;
+DD w = DD.ofProduct(a, b); // (152.41578753238835,-1.0325951435749745E-14)
+DD x = DD.ofSum(a, b); // (124.69135690246912,-1.1102230246251565E-15)
+DD y = DD.ofDifference(a, b); // (-122.22222112222221,-1.1102230246251565E-15)
+DD z = DD.fromQuotient(1, 3); // (0.3333333333333333,1.850371707708594E-17)
+// w.hi() = a * b
+// x.hi() = a + b
+// y.hi() = a - b
+// z.hi() = 1.0 / 3
+
+// Inefficient
+DD zz = DD.of(1).divide(DD.of(3));
+z.equals(zz) // true
++------------------------------------------+
+
+ The <<<DD>>> can also be converted from and to a <<<BigDecimal>>>. The conversion <<from>> may
+ lose precision. The conversion <<to>> is exact but only supported for finite numbers as BigDecimal
+ does not support infinity and NaN. The method <<<isFinite()>>> can be used to verify that the
+ evaluated sum \( x + xx \) is finite; this is only true when both parts are finite and conversion
+ to <<<BigDecimal>>> is possible. Note that <<<BigDecimal>>> can be used for numerical equality
+ and ranking operations as the <<<DD>>> class does not support <<<java.lang.Comparable>>>, and
+ the <<<equals(Object)>>> method is true for <<binary>> equality of the parts, not <<numerical>>
+ equality of their evaluated sum.
+ The following demonstrates a round trip of \( \pi \) to 50 decimal places. The <<<DD>>> is
+ limited to approximately 34 decimal places and this is split into
+ the <<<double>>> value for \( \pi \) and a <<<double>>> representing the rounding error.
+
++------------------------------------------+
+BigDecimal pi = new BigDecimal("3.14159265358979323846264338327950288419716939937510");
+DD x = DD.from(pi); // (3.141592653589793,1.2246467991473532E-16)
+pi.compareTo(x.bigDecimalValue()) // != 0
+// x.hi() = Math.PI
+// x.lo() = pi.subtract(new BigDecimal(Math.PI)).doubleValue()
+
+DD nan = DD.of(Double.NaN);
+nan.isFinite() // false
+nan.bigDecimalValue() // throws NumberFormatException
++------------------------------------------+
+
+ Note: It is not possible to directly specify the two parts of the <<<DD>>> number.
+ The two parts must be added using a sum to ensure the <<<DD>>> maintains a normalized
+ representation. If the two parts already represent a number
+ \( (x, xx) \) such that \( x == x + xx \) then the magnitudes of the parts will be
+ unchanged; any signed zeros may be subject to a sign change.
+
+ The <<<DD>>> class provides the arithmetic operations add, subtract, multiply, and divide for a
+ <<<double>>> or <<<DD>>> argument. <<<int>>> and <<<long>>> arguments can be used as a
+ <<<double>>> due to implicit type conversion. However if the <<<long>>> value has more than
+ 53-bits of precision then it should first be converted to a <<<DD>>>.
+
++------------------------------------------+
+long x = -8564728970587006436L;
+DD.ONE.add(x).longValue() // != x + 1
+DD.ONE.add(DD.of(x)).longValue() // == x + 1
++------------------------------------------+
+
+ The arithmetic methods are not intended to provide an exactly rounded
+ result to 106-bit precision. A compromise has been made between accuracy and performance so
+ that the <<<DD>>> class is a viable alternative to using <<<BigDecimal>>> for high accuracy
+ arithmetic when the ultimate result is required as a <<<double>>>. The approximate error bounds
+ of operations are supplied in the class javadoc. Typical errors bounds are within a relative error
+ of 1-4 <eps> of the exact result where <eps> is \( 2^\{-106\} \).
+
+ The following demonstrates a simple sum where the <<<double>>> representation of a fraction is
+ inexact and results in a rounding error. This is removed by extending the fraction to a
+ double-double representation:
+
++------------------------------------------+
+1.0 / 2 + 1.0 / 3 + 1.0 / 6 // = 0.9999999999999999
+DD z = DD.fromQuotient(1, 2)
+ .add(DD.fromQuotient(1, 3))
+ .add(DD.fromQuotient(1, 6)); // (1.0,-4.622231866529366E-33)
+z.doubleValue() // = 1.0
++------------------------------------------+
+
+ Note that the error on the final result is influenced by the calculation.
+ For summation of terms of the same sign a very large number of operations would be required to
+ observe a 1 ULP error in the final <<<double>>> result. If terms of the opposite sign are
+ summed then smaller magnitude intermediate terms can be lost due to the limited 106-bit
+ precision. In this case almost total cancellation will product the incorrect result.
+
++------------------------------------------+
+double a = 1;
+double b = Math.pow(2, 53);
+double c = Math.pow(2, 106);
+DD.of(a).add(b).add(c).subtract(c).subtract(b) // (0, 0)
+ // NOT (1, 0)
++------------------------------------------+
+
+ When using products the compound error will accumulate faster but is not influenced by the
+ sign of the operands. A power function <<<pow(int)>>> is provided for compound multiplication
+ of the same argument that is more efficient but does not reduce the error bound.
+
+ The <<<DDMath>>> class provides special support for operations using the <<<DD>>> class.
+ The purpose is to supplement the arithmetic operations in the <<<DD>>> class providing
+ greater accuracy at the cost of performance.
+ This includes a power function that returns the result in a fractional representation
+ to avoid over or underflow, with a lower error bound that the equivalent function in <<<DD>>>.
+
+** Overflow and underflow
+
+ A double-double number is limited to the same finite range as a <<<double>>>.
+ The class is intended for use when the ultimate result is finite and intermediate values
+ do not approach infinity or zero.
+
+ This implementation does not support IEEE standards for handling infinite and NaN when used
+ in arithmetic operations. Computations may split a 64-bit <<<double>>> into two parts and/or use
+ subtraction of intermediate terms to compute round-off parts. These operations may generate
+ infinite values due to overflow which then propagate through further operations to NaN,
+ for example computing the round-off using <<<Inf - Inf = NaN>>>.
+
+ Operations that involve splitting a double (multiply, divide) are safe
+ when the base 2 exponent is below 996. This puts an upper limit of approximately +/-6.7e299 on
+ any values to be split; in practice the arguments to multiply and divide operations are further
+ constrained by the expected finite value of the product or quotient.
+
+ Likewise the smallest value that can be represented is \( 2^\{-1074\} \). The full
+ 106-bit accuracy will be lost when intermediates are within \( 2^\{53\} \) of the minimum
+ normalized <<<double>>> \( 2^\{-1022\} \).
+
+ The <<<DD>>> result can be verified by checking it is a finite evaluated sum using
+ <<<isFinite()>>>. Computations expecting to approach over or underflow must use scaling of
+ intermediate terms using <<<frexp(int[])>>> and <<<scalb(int)>>> and
+ appropriate management of the current base 2 scale. The <<<frexp(int[])>>> method creates
+ a number with a fractional part \( f \) in the range \( [0.5, 1) \) and an exponent part
+ \( 2^b \) such that \( x = f \times 2^b \). The following example demonstrates scaling and
+ rescaling:
+
++------------------------------------------+
+double a = 1.5 * Math.pow(2, 1023);
+double b = 4 * Math.pow(2, -1022);
+DD x = DD.of(a);
+DD y = DD.of(b);
+
+// Values too extreme for DD arithmetic!
+x.multiply(y).isFinite() // false
+
+// Create fractional representation as [0.5, 1) * 2^b
+int[] xb = {0};
+int[] yb = {0};
+x = x.frexp(xb); // (0.75, 0) * 2^1024
+y = y.frexp(yb); // (0.5, 0) * 2^-1019
+
+DD z = x.multiply(y); // (0.375, 0)
+// Rescale by 2^5
+z.scalb(xb[0] + yb[0]).doubleValue() // = a * b
++------------------------------------------+
+
+ This simple example shows the added complexity in avoiding under and overflow. Some
+ common operations have been implemented in the <<<Sum>>> and <<<Norm>>> classes with
+ extended precision floating-point arithmetic. These use appropriate scaling and
+ ensure the correct IEEE result is returned in the event of a non-finite result.
+ Algorithms include summation, sum-of-products and Euclidean norms.
+
+** Comparison to Math.fma
+
+ The fused multiply-add method <<<Math.fma(double, double, double)>>> added in Java 9 returns
+ the exact product of the first two arguments summed with the third argument and then rounded
+ once to the nearest <<<double>>>. This avoids the two rounding operations that would occur
+ using <<<a * b + c>>> as a regular floating-point expression. The result is returned
+ as a <<<double>>>.
+
+ The <<<DD>>> class can represent the exact 106-bit product <<<a * b>>> as \( (x, xx) \).
+ Unlike <<<Math.fma>>>, which uses effectively unlimited precision arithmetic, the addend
+ <<<c>>> can only be used if it overlaps either the upper part \( x \) or the lower part
+ \( xx \) of the number. If too small then it will not change \(x, xx \); if too large then
+ the final result will be \( (c, x) \).
+ However the result will be returned with information that can be used in further computation.
+ Also note that <<<Math.fma>>> will compute the correct IEEE result if the value is non-finite.
+ It will also be protected from over and underflow for the intermediate result, for example:
+
++------------------------------------------+
+Math.fma(Double.MAX_VALUE, 2.0, -Double.MAX_VALUE) // = Double.MAX_VALUE
+Math.fma(Double.MIN_VALUE, 0.5, Double.MIN_VALUE) // = Double.MIN_VALUE * 1.5
++------------------------------------------+
+
+ The round-off from a standard floating-point expression <<<x * y>>> can be computed using
+ FMA:
+
++------------------------------------------+
+double x = ...
+double y = ...
+DD z = DD.ofProduct(x, y);
+z.hi() // = x * y;
+z.lo() // = Math.fma(x, y, -x * y)
++------------------------------------------+
+
+ This optimisation is not available on Java 8. The <<<DD>>> class uses standard <<<double>>>
+ arithmetic to split the arguments into parts \( x = x_h + x_l \) and \( y = y_h + y_l \),
+ each of which can be multiplied exactly by <<<double>>> arithmetic and used to compute
+ the round-off component. Splitting and computation of the round-off takes 16 FLOPS which is
+ far slower than a hardware supported FMA operation.
+
+** Application of DD
+
+ <<<DD>>> arithmetic is slower than <<<double>>> arithmetic and faster than <<<BigDecimal>>>.
+ The class has been implemented to avoid costly branch conditions in operations, such as those
+ required to support IEEE arithmetic for non-finite values, and avoid significant extra
+ computation for a gain of only 1 or 2 ULP accuracy in the lower part of the double-double number.
+ Users should assess the benefits of using standard <<<double>>> arithmetic, with or without
+ <<<Math.fma>>>, against <<<DD>>> or <<<BigDecimal>>> arithmetic with suitable accuracy and
+ performance benchmark data. For example the <<<DD>>> class is used in <<<Commons Numbers>>> and
+ {{{https://commons.apache.org/proper/commons-statistics/}commons-statistics}}
+ in targeted parts of function evaluations where accuracy is improved by 8-10 bits over standard
+ <<<double>>> arithmetic, or even to enable computations that have catastrophic error using
+ <<<double>>> arithmetic.
+
Field
The {{{../commons-numbers-field/index.html}commons-numbers-field}} module