You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@commons.apache.org by Benedikt Ritter <br...@apache.org> on 2014/12/14 20:51:04 UTC

Re: [text] SANDBOX-483 Add changes and fix old Javadocs from [lang] that remained after the code porting

2014-12-13 4:21 GMT+01:00 <ki...@apache.org>:
>
> Repository: commons-text
> Updated Branches:
>   refs/heads/master 87b789fbe -> 7570eb016
>
>
> SANDBOX-483 Add changes and fix old Javadocs from [lang] that remained
> after the code porting
>
>
> Project: http://git-wip-us.apache.org/repos/asf/commons-text/repo
> Commit:
> http://git-wip-us.apache.org/repos/asf/commons-text/commit/7570eb01
> Tree: http://git-wip-us.apache.org/repos/asf/commons-text/tree/7570eb01
> Diff: http://git-wip-us.apache.org/repos/asf/commons-text/diff/7570eb01
>
> Branch: refs/heads/master
> Commit: 7570eb0163cab027b444ca55e6d4c9768fcd0d34
> Parents: 87b789f
> Author: Bruno P. Kinoshita <ki...@apache.org>
> Authored: Sat Dec 13 01:21:11 2014 -0200
> Committer: Bruno P. Kinoshita <ki...@apache.org>
> Committed: Sat Dec 13 01:21:11 2014 -0200
>
> ----------------------------------------------------------------------
>  src/changes/changes.xml                         |   1 +
>  .../commons/text/similarity/FuzzyDistance.java  |  20 ++--
>  .../text/similarity/JaroWrinklerDistance.java   | 103 +++++++++----------
>  .../text/similarity/LevenshteinDistance.java    |  44 ++++----
>  4 files changed, 83 insertions(+), 85 deletions(-)
> ----------------------------------------------------------------------
>
>
>
> http://git-wip-us.apache.org/repos/asf/commons-text/blob/7570eb01/src/changes/changes.xml
> ----------------------------------------------------------------------
> diff --git a/src/changes/changes.xml b/src/changes/changes.xml
> index d8c3fdf..f890519 100644
> --- a/src/changes/changes.xml
> +++ b/src/changes/changes.xml
> @@ -23,6 +23,7 @@
>
>    <release version="1.0" date="tba" description="tba">
>      <action issue="SANDBOX-485" type="add" dev="kinow">Add Hamming
> distance</action>
> +    <action issue="SANDBOX-483" type="add" dev="kinow"
> due-to="britter">Incorporate String algorithms from Commons Lang</action>
>

Thanks for the kudos, but I didn't do much to resolve this issue :-)


>    </release>
>
>    </body>
>
>
> http://git-wip-us.apache.org/repos/asf/commons-text/blob/7570eb01/src/main/java/org/apache/commons/text/similarity/FuzzyDistance.java
> ----------------------------------------------------------------------
> diff --git
> a/src/main/java/org/apache/commons/text/similarity/FuzzyDistance.java
> b/src/main/java/org/apache/commons/text/similarity/FuzzyDistance.java
> index 8e9228a..f4299ea 100644
> --- a/src/main/java/org/apache/commons/text/similarity/FuzzyDistance.java
> +++ b/src/main/java/org/apache/commons/text/similarity/FuzzyDistance.java
> @@ -26,6 +26,10 @@ import java.util.Locale;
>   * indicates a higher similarity.
>   * </p>
>   *
> + * <p>
> + * This code has been adapted from Apache Commons Lang 3.3.
> + * </p>
> + *
>   * @since 1.0
>   */
>  public class FuzzyDistance implements StringMetric<Integer> {
> @@ -54,14 +58,14 @@ public class FuzzyDistance implements
> StringMetric<Integer> {
>       * </p>
>       *
>       * <pre>
> -     * StringUtils.getFuzzyDistance(null, null, null)
>                 = IllegalArgumentException
> -     * StringUtils.getFuzzyDistance("", "", Locale.ENGLISH)
>                 = 0
> -     * StringUtils.getFuzzyDistance("Workshop", "b", Locale.ENGLISH)
>                = 0
> -     * StringUtils.getFuzzyDistance("Room", "o", Locale.ENGLISH)
>                = 1
> -     * StringUtils.getFuzzyDistance("Workshop", "w", Locale.ENGLISH)
>                = 1
> -     * StringUtils.getFuzzyDistance("Workshop", "ws", Locale.ENGLISH)
>                 = 2
> -     * StringUtils.getFuzzyDistance("Workshop", "wo", Locale.ENGLISH)
>                 = 4
> -     * StringUtils.getFuzzyDistance("Apache Software Foundation", "asf",
> Locale.ENGLISH) = 3
> +     * distance.getFuzzyDistance(null, null, null)
>             = IllegalArgumentException
> +     * distance.getFuzzyDistance("", "", Locale.ENGLISH)
>             = 0
> +     * distance.getFuzzyDistance("Workshop", "b", Locale.ENGLISH)
>              = 0
> +     * distance.getFuzzyDistance("Room", "o", Locale.ENGLISH)
>              = 1
> +     * distance.getFuzzyDistance("Workshop", "w", Locale.ENGLISH)
>              = 1
> +     * distance.getFuzzyDistance("Workshop", "ws", Locale.ENGLISH)
>             = 2
> +     * distance.getFuzzyDistance("Workshop", "wo", Locale.ENGLISH)
>             = 4
> +     * distance.getFuzzyDistance("Apache Software Foundation", "asf",
> Locale.ENGLISH) = 3
>       * </pre>
>       *
>       * @param term a full term that should be matched against, must not
> be null
>
>
> http://git-wip-us.apache.org/repos/asf/commons-text/blob/7570eb01/src/main/java/org/apache/commons/text/similarity/JaroWrinklerDistance.java
> ----------------------------------------------------------------------
> diff --git
> a/src/main/java/org/apache/commons/text/similarity/JaroWrinklerDistance.java
> b/src/main/java/org/apache/commons/text/similarity/JaroWrinklerDistance.java
> index 3a94969..67aa2b8 100644
> ---
> a/src/main/java/org/apache/commons/text/similarity/JaroWrinklerDistance.java
> +++
> b/src/main/java/org/apache/commons/text/similarity/JaroWrinklerDistance.java
> @@ -49,20 +49,20 @@ public class JaroWrinklerDistance implements
> StringMetric<Double> {
>       * </p>
>       *
>       * <pre>
> -     * StringUtils.getJaroWinklerDistance(null, null)          =
> IllegalArgumentException
> -     * StringUtils.getJaroWinklerDistance("","")               = 0.0
> -     * StringUtils.getJaroWinklerDistance("","a")              = 0.0
> -     * StringUtils.getJaroWinklerDistance("aaapppp", "")       = 0.0
> -     * StringUtils.getJaroWinklerDistance("frog", "fog")       = 0.93
> -     * StringUtils.getJaroWinklerDistance("fly", "ant")        = 0.0
> -     * StringUtils.getJaroWinklerDistance("elephant", "hippo") = 0.44
> -     * StringUtils.getJaroWinklerDistance("hippo", "elephant") = 0.44
> -     * StringUtils.getJaroWinklerDistance("hippo", "zzzzzzzz") = 0.0
> -     * StringUtils.getJaroWinklerDistance("hello", "hallo")    = 0.88
> -     * StringUtils.getJaroWinklerDistance("ABC Corporation", "ABC Corp")
> = 0.91
> -     * StringUtils.getJaroWinklerDistance("D N H Enterprises Inc", "D
> &amp; H Enterprises, Inc.") = 0.93
> -     * StringUtils.getJaroWinklerDistance("My Gym Children's Fitness
> Center", "My Gym. Childrens Fitness") = 0.94
> -     * StringUtils.getJaroWinklerDistance("PENNSYLVANIA",
> "PENNCISYLVNIA")    = 0.9
> +     * distance.getJaroWinklerDistance(null, null)          =
> IllegalArgumentException
> +     * distance.getJaroWinklerDistance("","")               = 0.0
> +     * distance.getJaroWinklerDistance("","a")              = 0.0
> +     * distance.getJaroWinklerDistance("aaapppp", "")       = 0.0
> +     * distance.getJaroWinklerDistance("frog", "fog")       = 0.93
> +     * distance.getJaroWinklerDistance("fly", "ant")        = 0.0
> +     * distance.getJaroWinklerDistance("elephant", "hippo") = 0.44
> +     * distance.getJaroWinklerDistance("hippo", "elephant") = 0.44
> +     * distance.getJaroWinklerDistance("hippo", "zzzzzzzz") = 0.0
> +     * distance.getJaroWinklerDistance("hello", "hallo")    = 0.88
> +     * distance.getJaroWinklerDistance("ABC Corporation", "ABC Corp") =
> 0.91
> +     * distance.getJaroWinklerDistance("D N H Enterprises Inc", "D &amp;
> H Enterprises, Inc.") = 0.93
> +     * distance.getJaroWinklerDistance("My Gym Children's Fitness
> Center", "My Gym. Childrens Fitness") = 0.94
> +     * distance.getJaroWinklerDistance("PENNSYLVANIA", "PENNCISYLVNIA")
>   = 0.9
>       * </pre>
>       *
>       * @param left the first String, must not be null
> @@ -86,9 +86,6 @@ public class JaroWrinklerDistance implements
> StringMetric<Double> {
>          return matchScore;
>      }
>
> -    // TODO: we can move these methods to a Util class, keep them here,
> -    // create a common abstract class, shade lang-3.3...
> -
>      /**
>       * Calculates the number of characters from the beginning of the
> strings
>       * that match exactly one-to-one, up to a maximum of four (4)
> characters.
> @@ -118,30 +115,29 @@ public class JaroWrinklerDistance implements
> StringMetric<Double> {
>       * </p>
>       *
>       * <pre>
> -     * StringUtils.getCommonPrefix(null) = ""
> -     * StringUtils.getCommonPrefix(new String[] {}) = ""
> -     * StringUtils.getCommonPrefix(new String[] {"abc"}) = "abc"
> -     * StringUtils.getCommonPrefix(new String[] {null, null}) = ""
> -     * StringUtils.getCommonPrefix(new String[] {"", ""}) = ""
> -     * StringUtils.getCommonPrefix(new String[] {"", null}) = ""
> -     * StringUtils.getCommonPrefix(new String[] {"abc", null, null}) = ""
> -     * StringUtils.getCommonPrefix(new String[] {null, null, "abc"}) = ""
> -     * StringUtils.getCommonPrefix(new String[] {"", "abc"}) = ""
> -     * StringUtils.getCommonPrefix(new String[] {"abc", ""}) = ""
> -     * StringUtils.getCommonPrefix(new String[] {"abc", "abc"}) = "abc"
> -     * StringUtils.getCommonPrefix(new String[] {"abc", "a"}) = "a"
> -     * StringUtils.getCommonPrefix(new String[] {"ab", "abxyz"}) = "ab"
> -     * StringUtils.getCommonPrefix(new String[] {"abcde", "abxyz"}) = "ab"
> -     * StringUtils.getCommonPrefix(new String[] {"abcde", "xyz"}) = ""
> -     * StringUtils.getCommonPrefix(new String[] {"xyz", "abcde"}) = ""
> -     * StringUtils.getCommonPrefix(new String[] {"i am a machine", "i am
> a robot"}) = "i am a "
> +     * getCommonPrefix(null) = ""
> +     * getCommonPrefix(new String[] {}) = ""
> +     * getCommonPrefix(new String[] {"abc"}) = "abc"
> +     * getCommonPrefix(new String[] {null, null}) = ""
> +     * getCommonPrefix(new String[] {"", ""}) = ""
> +     * getCommonPrefix(new String[] {"", null}) = ""
> +     * getCommonPrefix(new String[] {"abc", null, null}) = ""
> +     * getCommonPrefix(new String[] {null, null, "abc"}) = ""
> +     * getCommonPrefix(new String[] {"", "abc"}) = ""
> +     * getCommonPrefix(new String[] {"abc", ""}) = ""
> +     * getCommonPrefix(new String[] {"abc", "abc"}) = "abc"
> +     * getCommonPrefix(new String[] {"abc", "a"}) = "a"
> +     * getCommonPrefix(new String[] {"ab", "abxyz"}) = "ab"
> +     * getCommonPrefix(new String[] {"abcde", "abxyz"}) = "ab"
> +     * getCommonPrefix(new String[] {"abcde", "xyz"}) = ""
> +     * getCommonPrefix(new String[] {"xyz", "abcde"}) = ""
> +     * getCommonPrefix(new String[] {"i am a machine", "i am a robot"}) =
> "i am a "
>       * </pre>
>       *
>       * @param strs array of String objects, entries may be null
>       * @return the initial sequence of characters that are common to all
> Strings
>       *         in the array; empty String if the array is null, the
> elements are
>       *         all null or if there is no common prefix.
> -     * @since 2.4
>       */
>      public static String getCommonPrefix(final String... strs) {
>          if (strs == null || strs.length == 0) {
> @@ -249,31 +245,28 @@ public class JaroWrinklerDistance implements
> StringMetric<Double> {
>       * </p>
>       *
>       * <pre>
> -     * StringUtils.indexOfDifference(null) = -1
> -     * StringUtils.indexOfDifference(new String[] {}) = -1
> -     * StringUtils.indexOfDifference(new String[] {"abc"}) = -1
> -     * StringUtils.indexOfDifference(new String[] {null, null}) = -1
> -     * StringUtils.indexOfDifference(new String[] {"", ""}) = -1
> -     * StringUtils.indexOfDifference(new String[] {"", null}) = 0
> -     * StringUtils.indexOfDifference(new String[] {"abc", null, null}) = 0
> -     * StringUtils.indexOfDifference(new String[] {null, null, "abc"}) = 0
> -     * StringUtils.indexOfDifference(new String[] {"", "abc"}) = 0
> -     * StringUtils.indexOfDifference(new String[] {"abc", ""}) = 0
> -     * StringUtils.indexOfDifference(new String[] {"abc", "abc"}) = -1
> -     * StringUtils.indexOfDifference(new String[] {"abc", "a"}) = 1
> -     * StringUtils.indexOfDifference(new String[] {"ab", "abxyz"}) = 2
> -     * StringUtils.indexOfDifference(new String[] {"abcde", "abxyz"}) = 2
> -     * StringUtils.indexOfDifference(new String[] {"abcde", "xyz"}) = 0
> -     * StringUtils.indexOfDifference(new String[] {"xyz", "abcde"}) = 0
> -     * StringUtils.indexOfDifference(new String[] {"i am a machine", "i
> am a robot"}) = 7
> +     * distance.indexOfDifference(null) = -1
> +     * distance.indexOfDifference(new String[] {}) = -1
> +     * distance.indexOfDifference(new String[] {"abc"}) = -1
> +     * distance.indexOfDifference(new String[] {null, null}) = -1
> +     * distance.indexOfDifference(new String[] {"", ""}) = -1
> +     * distance.indexOfDifference(new String[] {"", null}) = 0
> +     * distance.indexOfDifference(new String[] {"abc", null, null}) = 0
> +     * distance.indexOfDifference(new String[] {null, null, "abc"}) = 0
> +     * distance.indexOfDifference(new String[] {"", "abc"}) = 0
> +     * distance.indexOfDifference(new String[] {"abc", ""}) = 0
> +     * distance.indexOfDifference(new String[] {"abc", "abc"}) = -1
> +     * distance.indexOfDifference(new String[] {"abc", "a"}) = 1
> +     * distance.indexOfDifference(new String[] {"ab", "abxyz"}) = 2
> +     * distance.indexOfDifference(new String[] {"abcde", "abxyz"}) = 2
> +     * distance.indexOfDifference(new String[] {"abcde", "xyz"}) = 0
> +     * distance.indexOfDifference(new String[] {"xyz", "abcde"}) = 0
> +     * distance.indexOfDifference(new String[] {"i am a machine", "i am a
> robot"}) = 7
>       * </pre>
>       *
>       * @param css array of CharSequences, entries may be null
>       * @return the index where the strings begin to differ; -1 if they
> are all
>       *         equal
> -     * @since 2.4
> -     * @since 3.0 Changed signature from indexOfDifference(String...) to
> -     *        indexOfDifference(CharSequence...)
>       */
>      protected static int indexOfDifference(final CharSequence... css) {
>          if (css == null || css.length <= 1) {
>
>
> http://git-wip-us.apache.org/repos/asf/commons-text/blob/7570eb01/src/main/java/org/apache/commons/text/similarity/LevenshteinDistance.java
> ----------------------------------------------------------------------
> diff --git
> a/src/main/java/org/apache/commons/text/similarity/LevenshteinDistance.java
> b/src/main/java/org/apache/commons/text/similarity/LevenshteinDistance.java
> index 1793f1e..cca3dc1 100644
> ---
> a/src/main/java/org/apache/commons/text/similarity/LevenshteinDistance.java
> +++
> b/src/main/java/org/apache/commons/text/similarity/LevenshteinDistance.java
> @@ -59,17 +59,17 @@ public class LevenshteinDistance implements
> StringMetric<Integer> {
>       * </p>
>       *
>       * <pre>
> -     * StringUtils.getLevenshteinDistance(null, *)             =
> IllegalArgumentException
> -     * StringUtils.getLevenshteinDistance(*, null)             =
> IllegalArgumentException
> -     * StringUtils.getLevenshteinDistance("","")               = 0
> -     * StringUtils.getLevenshteinDistance("","a")              = 1
> -     * StringUtils.getLevenshteinDistance("aaapppp", "")       = 7
> -     * StringUtils.getLevenshteinDistance("frog", "fog")       = 1
> -     * StringUtils.getLevenshteinDistance("fly", "ant")        = 3
> -     * StringUtils.getLevenshteinDistance("elephant", "hippo") = 7
> -     * StringUtils.getLevenshteinDistance("hippo", "elephant") = 7
> -     * StringUtils.getLevenshteinDistance("hippo", "zzzzzzzz") = 8
> -     * StringUtils.getLevenshteinDistance("hello", "hallo")    = 1
> +     * distance.getLevenshteinDistance(null, *)             =
> IllegalArgumentException
> +     * distance.getLevenshteinDistance(*, null)             =
> IllegalArgumentException
> +     * distance.getLevenshteinDistance("","")               = 0
> +     * distance.getLevenshteinDistance("","a")              = 1
> +     * distance.getLevenshteinDistance("aaapppp", "")       = 7
> +     * distance.getLevenshteinDistance("frog", "fog")       = 1
> +     * distance.getLevenshteinDistance("fly", "ant")        = 3
> +     * distance.getLevenshteinDistance("elephant", "hippo") = 7
> +     * distance.getLevenshteinDistance("hippo", "elephant") = 7
> +     * distance.getLevenshteinDistance("hippo", "zzzzzzzz") = 8
> +     * distance.getLevenshteinDistance("hello", "hallo")    = 1
>       * </pre>
>       *
>       * @param left the first string, must not be null
> @@ -103,17 +103,17 @@ public class LevenshteinDistance implements
> StringMetric<Integer> {
>       * </p>
>       *
>       * <pre>
> -     * StringUtils.getLevenshteinDistance(null, *, *)             =
> IllegalArgumentException
> -     * StringUtils.getLevenshteinDistance(*, null, *)             =
> IllegalArgumentException
> -     * StringUtils.getLevenshteinDistance(*, *, -1)               =
> IllegalArgumentException
> -     * StringUtils.getLevenshteinDistance("","", 0)               = 0
> -     * StringUtils.getLevenshteinDistance("aaapppp", "", 8)       = 7
> -     * StringUtils.getLevenshteinDistance("aaapppp", "", 7)       = 7
> -     * StringUtils.getLevenshteinDistance("aaapppp", "", 6))      = -1
> -     * StringUtils.getLevenshteinDistance("elephant", "hippo", 7) = 7
> -     * StringUtils.getLevenshteinDistance("elephant", "hippo", 6) = -1
> -     * StringUtils.getLevenshteinDistance("hippo", "elephant", 7) = 7
> -     * StringUtils.getLevenshteinDistance("hippo", "elephant", 6) = -1
> +     * distance.getLevenshteinDistance(null, *, *)             =
> IllegalArgumentException
> +     * distance.getLevenshteinDistance(*, null, *)             =
> IllegalArgumentException
> +     * distance.getLevenshteinDistance(*, *, -1)               =
> IllegalArgumentException
> +     * distance.getLevenshteinDistance("","", 0)               = 0
> +     * distance.getLevenshteinDistance("aaapppp", "", 8)       = 7
> +     * distance.getLevenshteinDistance("aaapppp", "", 7)       = 7
> +     * distance.getLevenshteinDistance("aaapppp", "", 6))      = -1
> +     * distance.getLevenshteinDistance("elephant", "hippo", 7) = 7
> +     * distance.getLevenshteinDistance("elephant", "hippo", 6) = -1
> +     * distance.getLevenshteinDistance("hippo", "elephant", 7) = 7
> +     * distance.getLevenshteinDistance("hippo", "elephant", 6) = -1
>       * </pre>
>       *
>       * @param left the first string, must not be null
>
>

-- 
http://people.apache.org/~britter/
http://www.systemoutprintln.de/
http://twitter.com/BenediktRitter
http://github.com/britter