You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@commons.apache.org by ah...@apache.org on 2020/01/06 02:00:09 UTC
[commons-numbers] branch master updated: Simplify high precision summation using Dekker's add2 sum.

This is an automated email from the ASF dual-hosted git repository.

aherbert pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/commons-numbers.git


The following commit(s) were added to refs/heads/master by this push:
     new 788fdd4  Simplify high precision summation using Dekker's add2 sum.
788fdd4 is described below

commit 788fdd4a983392fe68377ff1f3f0ca783cd67049
Author: Alex Herbert <ah...@apache.org>
AuthorDate: Mon Jan 6 02:00:05 2020 +0000

    Simplify high precision summation using Dekker's add2 sum.
---
 .../apache/commons/numbers/complex/Complex.java    | 71 +++++++---------------
 1 file changed, 22 insertions(+), 49 deletions(-)

diff --git a/commons-numbers-complex/src/main/java/org/apache/commons/numbers/complex/Complex.java b/commons-numbers-complex/src/main/java/org/apache/commons/numbers/complex/Complex.java
index 4b4d98b..921e6c9 100644
--- a/commons-numbers-complex/src/main/java/org/apache/commons/numbers/complex/Complex.java
+++ b/commons-numbers-complex/src/main/java/org/apache/commons/numbers/complex/Complex.java
@@ -2259,7 +2259,7 @@ public final class Complex implements Serializable  {
             // - The answer will not be NaN as the terms are not NaN components
             // - The order is known to be 1 > |x| >= |y|
             // The squares are computed using a split multiply algorithm and
-            // the summation using a fast-expansion-sum algorithm.
+            // the summation using a double-length summation algorithm.
 
             // Split x and y as one 26 bits number and one 27 bits number
             final double xHigh        = splitHigh(x);
@@ -2273,7 +2273,7 @@ public final class Complex implements Serializable  {
             final double y2High       = y * y;
             final double y2Low        = squareRoundOff(yLow, yHigh, y2High);
 
-            return fastExpansionSumx2y2m1(x2High, x2Low, y2High, y2Low);
+            return sumx2y2m1(x2High, x2Low, y2High, y2Low);
         }
         return (x - 1) * (x + 1) + yy;
     }
@@ -2290,14 +2290,14 @@ public final class Complex implements Serializable  {
      * </pre>
      *
      * <p>The multiplicand must be odd allowing a p-bit value to be split into
-     * (p-s)-bit value {@code a_hi} and a nonoverlapping (s-1)-bit value {@code a_lo}.
+     * (p-s)-bit value {@code a_hi} and a non-overlapping (s-1)-bit value {@code a_lo}.
      * Combined they have (p􏰔-1) bits of significand but the sign bit of {@code a_lo}
      * contains a bit of information.
      *
      * @param a Value.
      * @return the high part of the value.
-     * @see <a href="http://www-2.cs.cmu.edu/afs/cs/project/quake/public/papers/robust-arithmetic.ps">
-     * Shewchuk (1997) Theorum 17</a>
+     * @see <a href="https://doi.org/10.1007/BF01397083">
+     * Dekker (1971) A floating-point technique for extending the available precision</a>
      */
     private static double splitHigh(double a) {
         final double c = MULTIPLIER * a;
@@ -2312,64 +2312,37 @@ public final class Complex implements Serializable  {
      * @param high High part of number.
      * @param square Square of the number.
      * @return <code>low * low - ((product - high * high) - 2 * low * high)</code>
+     * @see <a href="http://www-2.cs.cmu.edu/afs/cs/project/quake/public/papers/robust-arithmetic.ps">
+     * Shewchuk (1997) Theorum 18</a>
      */
     private static double squareRoundOff(double low, double high, double square) {
         return low * low - ((square - high * high) - 2 * low * high);
     }
 
     /**
-     * Compute the round-off from the sum of two numbers {@code a} and {@code b} using
-     * Knuth's two-sum algorithm. The values are not required to be ordered by magnitude.
-     *
-     * @param a First part of sum.
-     * @param b Second part of sum.
-     * @param sum Sum.
-     * @return <code>(b - (sum - (sum - b))) + (a - (sum - b))</code>
-     */
-    private static double sumRoundOff(double a, double b, double sum) {
-        final double aPrime = sum - b;
-        // sum - aPrime == bPrime.
-        // a - aPrime == a round-off
-        // b - bPrime == b round-off
-        return (a - aPrime) + (b - (sum - aPrime));
-    }
-
-    /**
      * Sum x^2 + y^2 - 1.
      *
-     * <p>Uses Shewchuk's fast expansion sum under the assumption that {@code 1 > |x| >= |y|}
-     * thus the high and low products from {@code x^2} and {@code y^2} can be ordered into
-     * a single sequence g, in order of nondecreasing magnitude with no sorting.
+     * <p>Implement Dekker's add2 sum under the assumption that {@code |x| >= |y|}.
      *
      * @param x2High High part of x^2.
      * @param x2Low Low part of x^2.
      * @param y2High High part of y^2.
      * @param y2Low Low part of y^2.
      * @return x^2 + y^2 - 1
-     * @see <a href="http://www-2.cs.cmu.edu/afs/cs/project/quake/public/papers/robust-arithmetic.ps">
-     * Shewchuk (1997) Adaptive Precision Floating-Point Arithmetic and Fast Robust Geometric Predicates: Theorum 13</a>
-     */
-    private static double fastExpansionSumx2y2m1(double x2High, double x2Low, double y2High, double y2Low) {
-        // Create a single sequence g that is in order of nondecreasing magnitude.
-        // Since |x| >= |y| then x2High >= y2High and the two smallest components are the
-        // round-off from x^2 and y^2. The final term is -1 which will never have a bit overlap
-        // with the products x^2 or y^2 as 1 has no bits.
-        // Thus this implements fast-two-sum: e + f as {x2Low, x2High} + {y2Low, y2High, -1}.
-        // In Shewchuk the two smallest components are added using a
-        // fast-two-sum which requires ordering. Here we use a two-sum to avoid order comparators.
-        // The remaining components are added with a two-sum that does not require ordering.
-        // Q_i is the running total of summing sequence g, {y2Low, x2Low, y2High, x2High, -1}.
-        // h_i is the output sequence; this is guaranteed to produce a strongly non-overlapping
-        // output if its inputs are strongly non-overlapping. The sum of h is the sum of e + f.
-        final double q1 = x2Low + y2Low;
-        final double h1 = sumRoundOff(x2Low, y2Low, q1);
-        final double q2 = q1 + y2High;
-        final double h2 = sumRoundOff(q1, y2High, q2);
-        final double q3 = q2 + x2High;
-        final double h3 = sumRoundOff(q2, x2High, q3);
-        final double h5 = q3 - 1;
-        final double h4 = sumRoundOff(q3, -1, h5);
-        return h1 + h2 + h3 + h4 + h5;
+     */
+    private static double sumx2y2m1(double x2High, double x2Low, double y2High, double y2Low) {
+        // Dekker's add2 algorithm to compute the double length scalar product x^2 + y^2.
+        // See Dekker (1971) Numerische Mathematik 18, p.240.
+        double r = x2High + y2High;
+        // Assume |x| > |y|
+        double s = x2High - r + y2High + y2Low + x2Low;
+        final double z = r + s;
+        final double zz = r - z + s;
+        // Repeat with (z, zz) add (-1, 0) to create the upper part of the
+        // double length scalar product x^2 + y^2 - 1.
+        r = z - 1;
+        s = z - r - 1 + zz;
+        return r + s;
     }
 
     /**