You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@commons.apache.org by ch...@apache.org on 2018/08/08 16:45:47 UTC

[2/6] [text] Fix [TEXT-130] JaroWinklerDistance: Wrong results due to precision of transpositions

Fix [TEXT-130] JaroWinklerDistance: Wrong results due to precision of transpositions


Project: http://git-wip-us.apache.org/repos/asf/commons-text/repo
Commit: http://git-wip-us.apache.org/repos/asf/commons-text/commit/4d064dec
Tree: http://git-wip-us.apache.org/repos/asf/commons-text/tree/4d064dec
Diff: http://git-wip-us.apache.org/repos/asf/commons-text/diff/4d064dec

Branch: refs/heads/master
Commit: 4d064decbf7828918ca59b70d7fca19b7da955ec
Parents: 70150fb
Author: Jan Martin Keil <ja...@uni-jena.de>
Authored: Thu Aug 2 22:55:00 2018 +0200
Committer: Jan Martin Keil <ja...@uni-jena.de>
Committed: Thu Aug 2 22:55:00 2018 +0200

----------------------------------------------------------------------
 .../commons/text/similarity/JaroWinklerDistance.java    | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/commons-text/blob/4d064dec/src/main/java/org/apache/commons/text/similarity/JaroWinklerDistance.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/similarity/JaroWinklerDistance.java b/src/main/java/org/apache/commons/text/similarity/JaroWinklerDistance.java
index 0ffb1ad..915cd5c 100644
--- a/src/main/java/org/apache/commons/text/similarity/JaroWinklerDistance.java
+++ b/src/main/java/org/apache/commons/text/similarity/JaroWinklerDistance.java
@@ -85,17 +85,17 @@ public class JaroWinklerDistance implements SimilarityScore<Double> {
         if (m == 0) {
             return 0D;
         }
-        final double j = ((m / left.length() + m / right.length() + (m - mtp[1]) / m)) / 3;
+        final double j = ((m / left.length() + m / right.length() + (m - (double) mtp[1] / 2) / m)) / 3;
         final double jw = j < 0.7D ? j : j + Math.min(defaultScalingFactor, 1D / mtp[3]) * mtp[2] * (1D - j);
         return jw;
     }
 
     /**
-     * This method returns the Jaro-Winkler string matches, transpositions, prefix, max array.
+     * This method returns the Jaro-Winkler string matches, half transpositions, prefix, max array.
      *
      * @param first the first string to be matched
      * @param second the second string to be matched
-     * @return mtp array containing: matches, transpositions, prefix, and max length
+     * @return mtp array containing: matches, half transpositions, prefix, and max length
      */
     protected static int[] matches(final CharSequence first, final CharSequence second) {
         CharSequence max, min;
@@ -136,10 +136,10 @@ public class JaroWinklerDistance implements SimilarityScore<Double> {
                 si++;
             }
         }
-        int transpositions = 0;
+        int halfTranspositions = 0;
         for (int mi = 0; mi < ms1.length; mi++) {
             if (ms1[mi] != ms2[mi]) {
-                transpositions++;
+                halfTranspositions++;
             }
         }
         int prefix = 0;
@@ -150,7 +150,7 @@ public class JaroWinklerDistance implements SimilarityScore<Double> {
                 break;
             }
         }
-        return new int[] {matches, transpositions / 2, prefix, max.length()};
+        return new int[] {matches, halfTranspositions, prefix, max.length()};
     }
 
 }