You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@commons.apache.org by ki...@apache.org on 2015/04/15 11:53:06 UTC
[text] SANDBOX-488 Use an interface for EditDistance,
and leave separate classes as utility objects
Repository: commons-text
Updated Branches:
refs/heads/SANDBOX-488 [created] b0b9d358c
SANDBOX-488 Use an interface for EditDistance, and leave separate classes as utility objects
Project: http://git-wip-us.apache.org/repos/asf/commons-text/repo
Commit: http://git-wip-us.apache.org/repos/asf/commons-text/commit/b0b9d358
Tree: http://git-wip-us.apache.org/repos/asf/commons-text/tree/b0b9d358
Diff: http://git-wip-us.apache.org/repos/asf/commons-text/diff/b0b9d358
Branch: refs/heads/SANDBOX-488
Commit: b0b9d358cc44e8e52d0675abf49eaefac003a112
Parents: 67ab6de
Author: Bruno P. Kinoshita <br...@yahoo.com.br>
Authored: Wed Apr 15 21:52:30 2015 +1200
Committer: Bruno P. Kinoshita <br...@yahoo.com.br>
Committed: Wed Apr 15 21:52:30 2015 +1200
----------------------------------------------------------------------
.../commons/text/similarity/CosineDistance.java | 4 +-
.../text/similarity/CosineSimilarity.java | 2 +-
.../commons/text/similarity/EditDistance.java | 48 ++++++++
.../text/similarity/EditDistanceFrom.java | 112 +++++++++++++++++++
.../commons/text/similarity/FuzzyScore.java | 23 ++--
.../text/similarity/HammingDistance.java | 4 +-
.../text/similarity/JaroWrinklerDistance.java | 8 +-
.../text/similarity/LevenshteinDistance.java | 4 +-
.../commons/text/similarity/StringMetric.java | 47 --------
.../text/similarity/StringMetricFrom.java | 111 ------------------
.../commons/text/similarity/FuzzyScoreTest.java | 20 ++--
.../ParameterizedEditDistanceFromTest.java | 92 +++++++++++++++
.../ParameterizedStringMetricFromTest.java | 92 ---------------
.../text/similarity/StringMetricFromTest.java | 14 +--
14 files changed, 295 insertions(+), 286 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/commons-text/blob/b0b9d358/src/main/java/org/apache/commons/text/similarity/CosineDistance.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/similarity/CosineDistance.java b/src/main/java/org/apache/commons/text/similarity/CosineDistance.java
index f9fcf39..98ef49e 100644
--- a/src/main/java/org/apache/commons/text/similarity/CosineDistance.java
+++ b/src/main/java/org/apache/commons/text/similarity/CosineDistance.java
@@ -28,9 +28,9 @@ import org.apache.commons.text.similarity.internal.Tokenizer;
* <p>It utilizes the CosineSimilarity to compute the distance. Character sequences
* are converted into vectors through a simple tokenizer that works with </p>
*
- * @since 0.1
+ * @since 1.0
*/
-public class CosineDistance implements StringMetric<Double> {
+public class CosineDistance implements EditDistance<Double> {
/**
* Tokenizer used to convert the character sequence into a vector.
*/
http://git-wip-us.apache.org/repos/asf/commons-text/blob/b0b9d358/src/main/java/org/apache/commons/text/similarity/CosineSimilarity.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/similarity/CosineSimilarity.java b/src/main/java/org/apache/commons/text/similarity/CosineSimilarity.java
index 4b29a04..cf21186 100644
--- a/src/main/java/org/apache/commons/text/similarity/CosineSimilarity.java
+++ b/src/main/java/org/apache/commons/text/similarity/CosineSimilarity.java
@@ -29,7 +29,7 @@ import java.util.Set;
* http://en.wikipedia.org/wiki/Cosine_similarity.
* </p>
*
- * @since 0.1
+ * @since 1.0
*/
public class CosineSimilarity {
http://git-wip-us.apache.org/repos/asf/commons-text/blob/b0b9d358/src/main/java/org/apache/commons/text/similarity/EditDistance.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/similarity/EditDistance.java b/src/main/java/org/apache/commons/text/similarity/EditDistance.java
new file mode 100644
index 0000000..824522a
--- /dev/null
+++ b/src/main/java/org/apache/commons/text/similarity/EditDistance.java
@@ -0,0 +1,48 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.text.similarity;
+
+/**
+ * Interface for <a href="http://en.wikipedia.org/wiki/Edit_distance">Edit Distances</a>.
+ *
+ * <p>
+ * A edit distance measures the similarity between two character sequences. Closer strings
+ * have shorter distances, and vice-versa.
+ * </p>
+ *
+ * <p>
+ * This is a BiFunction<CharSequence, CharSequence, R>.
+ * The <code>apply</code> method
+ * accepts a pair of {@link CharSequence} parameters
+ * and returns an <code>R</code> type similarity score.
+ * </p>
+ *
+ * @param <R> The type of similarity score unit used by this EditDistance.
+ * @since 1.0
+ */
+public interface EditDistance<R> {
+
+ /**
+ * Compares two CharSequences.
+ *
+ * @param left the first CharSequence
+ * @param right the second CharSequence
+ * @return the similarity score between two CharSequences
+ */
+ R apply(CharSequence left, CharSequence right);
+
+}
http://git-wip-us.apache.org/repos/asf/commons-text/blob/b0b9d358/src/main/java/org/apache/commons/text/similarity/EditDistanceFrom.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/similarity/EditDistanceFrom.java b/src/main/java/org/apache/commons/text/similarity/EditDistanceFrom.java
new file mode 100644
index 0000000..710eace
--- /dev/null
+++ b/src/main/java/org/apache/commons/text/similarity/EditDistanceFrom.java
@@ -0,0 +1,112 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.text.similarity;
+
+/**
+ * <p>
+ * This stores a {@link EditDistance} implementation and a {@link CharSequence} "left" string.
+ * The {@link #apply(CharSequence right)} method accepts the "right" string and invokes the
+ * comparison function for the pair of strings.
+ * </p>
+ *
+ * <p>
+ * The following is an example which finds the most similar string:
+ * </p>
+ * <pre>
+ * EditDistance<Integer> editDistance = new LevenshteinDistance();
+ * String target = "Apache";
+ * EditDistanceFrom<Integer> editDistanceFrom =
+ * new EditDistanceFrom<Integer>(editDistance, target);
+ * String mostSimilar = null;
+ * Integer shortestDistance = null;
+ *
+ * for (String test : new String[] { "Appaloosa", "a patchy", "apple" }) {
+ * Integer distance = editDistanceFrom.apply(test);
+ * if (shortestDistance == null || distance < shortestDistance) {
+ * shortestDistance = distance;
+ * mostSimilar = test;
+ * }
+ * }
+ *
+ * System.out.println("The string most similar to \"" + target + "\" "
+ * + "is \"" + mostSimilar + "\" because "
+ * + "its distance is only " + shortestDistance + ".");
+ * </pre>
+ *
+ * @param <R> This is the type of similarity score used by the EditDistance function.
+ * @since 1.0
+ */
+public class EditDistanceFrom<R> {
+
+ /**
+ * Edit distance.
+ */
+ private final EditDistance<R> editDistance;
+ /**
+ * Left parameter used in distance function.
+ */
+ private final CharSequence left;
+
+ /**
+ * <p>This accepts the edit distance implementation and the "left" string.</p>
+ *
+ * @param editDistance This may not be null.
+ * @param left This may be null here,
+ * but the EditDistance#compare(CharSequence left, CharSequence right)
+ * implementation may not accept nulls.
+ */
+ public EditDistanceFrom(final EditDistance<R> editDistance, final CharSequence left) {
+ if (editDistance == null) {
+ throw new IllegalArgumentException("The edit distance may not be null.");
+ }
+
+ this.editDistance = editDistance;
+ this.left = left;
+ }
+
+ /**
+ * <p>
+ * This compares "left" field against the "right" parameter
+ * using the "edit distance" implementation.
+ * </p>
+ *
+ * @param right the second CharSequence
+ * @return the similarity score between two CharSequences
+ */
+ public R apply(CharSequence right) {
+ return editDistance.apply(left, right);
+ }
+
+ /**
+ * Gets the left parameter.
+ *
+ * @return the left parameter
+ */
+ public CharSequence getLeft() {
+ return left;
+ }
+
+ /**
+ * Gets the edit distance.
+ *
+ * @return the edit distance
+ */
+ public EditDistance<R> getEditDistance() {
+ return editDistance;
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/commons-text/blob/b0b9d358/src/main/java/org/apache/commons/text/similarity/FuzzyScore.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/similarity/FuzzyScore.java b/src/main/java/org/apache/commons/text/similarity/FuzzyScore.java
index 73b282a..32b557a 100644
--- a/src/main/java/org/apache/commons/text/similarity/FuzzyScore.java
+++ b/src/main/java/org/apache/commons/text/similarity/FuzzyScore.java
@@ -30,8 +30,10 @@ import java.util.Locale;
* <p>
* This code has been adapted from Apache Commons Lang 3.3.
* </p>
+ *
+ * @since 1.0
*/
-public class FuzzyScore implements StringMetric<Integer> {
+public class FuzzyScore {
/**
* Locale used to change the case of text.
@@ -61,14 +63,14 @@ public class FuzzyScore implements StringMetric<Integer> {
* </p>
*
* <pre>
- * score.apply(null, null, null) = IllegalArgumentException
- * score.apply("", "", Locale.ENGLISH) = 0
- * score.apply("Workshop", "b", Locale.ENGLISH) = 0
- * score.apply("Room", "o", Locale.ENGLISH) = 1
- * score.apply("Workshop", "w", Locale.ENGLISH) = 1
- * score.apply("Workshop", "ws", Locale.ENGLISH) = 2
- * score.apply("Workshop", "wo", Locale.ENGLISH) = 4
- * score.apply("Apache Software Foundation", "asf", Locale.ENGLISH) = 3
+ * score.fuzzyScore(null, null, null) = IllegalArgumentException
+ * score.fuzzyScore("", "", Locale.ENGLISH) = 0
+ * score.fuzzyScore("Workshop", "b", Locale.ENGLISH) = 0
+ * score.fuzzyScore("Room", "o", Locale.ENGLISH) = 1
+ * score.fuzzyScore("Workshop", "w", Locale.ENGLISH) = 1
+ * score.fuzzyScore("Workshop", "ws", Locale.ENGLISH) = 2
+ * score.fuzzyScore("Workshop", "wo", Locale.ENGLISH) = 4
+ * score.fuzzyScore("Apache Software Foundation", "asf", Locale.ENGLISH) = 3
* </pre>
*
* @param term a full term that should be matched against, must not be null
@@ -78,8 +80,7 @@ public class FuzzyScore implements StringMetric<Integer> {
* @throws IllegalArgumentException if either String input {@code null} or
* Locale input {@code null}
*/
- @Override
- public Integer apply(CharSequence term, CharSequence query) {
+ public Integer fuzzyScore(CharSequence term, CharSequence query) {
if (term == null || query == null) {
throw new IllegalArgumentException("Strings must not be null");
}
http://git-wip-us.apache.org/repos/asf/commons-text/blob/b0b9d358/src/main/java/org/apache/commons/text/similarity/HammingDistance.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/similarity/HammingDistance.java b/src/main/java/org/apache/commons/text/similarity/HammingDistance.java
index 94d0aad..a62cfa5 100644
--- a/src/main/java/org/apache/commons/text/similarity/HammingDistance.java
+++ b/src/main/java/org/apache/commons/text/similarity/HammingDistance.java
@@ -24,8 +24,10 @@ package org.apache.commons.text.similarity;
* For further explanation about the Hamming Distance, take a look at its
* Wikipedia page at http://en.wikipedia.org/wiki/Hamming_distance.
* </p>
+ *
+ * @since 1.0
*/
-public class HammingDistance implements StringMetric<Integer> {
+public class HammingDistance implements EditDistance<Integer> {
/**
* Find the Hamming Distance between two strings with the same
http://git-wip-us.apache.org/repos/asf/commons-text/blob/b0b9d358/src/main/java/org/apache/commons/text/similarity/JaroWrinklerDistance.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/similarity/JaroWrinklerDistance.java b/src/main/java/org/apache/commons/text/similarity/JaroWrinklerDistance.java
index b96b83b..df9d6b2 100644
--- a/src/main/java/org/apache/commons/text/similarity/JaroWrinklerDistance.java
+++ b/src/main/java/org/apache/commons/text/similarity/JaroWrinklerDistance.java
@@ -34,8 +34,10 @@ package org.apache.commons.text.similarity;
* <p>
* This code has been adapted from Apache Commons Lang 3.3.
* </p>
+ *
+ * @since 1.0
*/
-public class JaroWrinklerDistance implements StringMetric<Double> {
+public class JaroWrinklerDistance implements EditDistance<Double> {
/**
* The default prefix length limit set to four.
@@ -83,8 +85,8 @@ public class JaroWrinklerDistance implements StringMetric<Double> {
final double jaro = score(left, right);
final int cl = commonPrefixLength(left, right);
- final double matchScore = Math.round((jaro + (defaultScalingFactor
- * cl * (1.0 - jaro))) * percentageRoundValue) / percentageRoundValue;
+ final double matchScore = Math.round((jaro + defaultScalingFactor
+ * cl * (1.0 - jaro)) * percentageRoundValue) / percentageRoundValue;
return matchScore;
}
http://git-wip-us.apache.org/repos/asf/commons-text/blob/b0b9d358/src/main/java/org/apache/commons/text/similarity/LevenshteinDistance.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/similarity/LevenshteinDistance.java b/src/main/java/org/apache/commons/text/similarity/LevenshteinDistance.java
index f776cce..d94fa47 100644
--- a/src/main/java/org/apache/commons/text/similarity/LevenshteinDistance.java
+++ b/src/main/java/org/apache/commons/text/similarity/LevenshteinDistance.java
@@ -30,8 +30,10 @@ import java.util.Arrays;
* <p>
* This code has been adapted from Apache Commons Lang 3.3.
* </p>
+ *
+ * @since 1.0
*/
-public class LevenshteinDistance implements StringMetric<Integer> {
+public class LevenshteinDistance implements EditDistance<Integer> {
/**
* Default instance.
http://git-wip-us.apache.org/repos/asf/commons-text/blob/b0b9d358/src/main/java/org/apache/commons/text/similarity/StringMetric.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/similarity/StringMetric.java b/src/main/java/org/apache/commons/text/similarity/StringMetric.java
deleted file mode 100644
index 2d1adfa..0000000
--- a/src/main/java/org/apache/commons/text/similarity/StringMetric.java
+++ /dev/null
@@ -1,47 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.commons.text.similarity;
-
-/**
- * Interface for <a href='http://en.wikipedia.org/wiki/String_metric'>String Metrics</a>.
- *
- * <p>
- * A string metric measures the similarity between two character sequences. Depending on
- * the algorithm, higher values can mean closer strings, or more distant strings.
- * </p>
- *
- * <p>
- * This is a BiFunction<CharSequence, CharSequence, R>.
- * The <code>apply</code> method
- * accepts a pair of {@link CharSequence} parameters
- * and returns an <code>R</code> type similarity score.
- * </p>
- *
- * @param <R> The type of similarity score unit used by this StringMetric.
- */
-public interface StringMetric<R> {
-
- /**
- * Compares two CharSequences.
- *
- * @param left the first CharSequence
- * @param right the second CharSequence
- * @return the similarity score between two CharSequences
- */
- R apply(CharSequence left, CharSequence right);
-
-}
http://git-wip-us.apache.org/repos/asf/commons-text/blob/b0b9d358/src/main/java/org/apache/commons/text/similarity/StringMetricFrom.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/similarity/StringMetricFrom.java b/src/main/java/org/apache/commons/text/similarity/StringMetricFrom.java
deleted file mode 100644
index 3b2a871..0000000
--- a/src/main/java/org/apache/commons/text/similarity/StringMetricFrom.java
+++ /dev/null
@@ -1,111 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.commons.text.similarity;
-
-/**
- * <p>
- * This stores a {@link StringMetric} implementation and a {@link CharSequence} "left" string.
- * The {@link #apply(CharSequence right)} method accepts the "right" string and invokes the
- * comparison function for the pair of strings.
- * </p>
- *
- * <p>
- * The following is an example which finds the most similar string:
- * </p>
- * <pre>
- * StringMetric<Integer> metric = new LevenshteinDistance();
- * String target = "Apache";
- * StringMetricFrom<Integer> metricFrom =
- * new StringMetricFrom<Integer>(metric, target);
- * String mostSimilar = null;
- * Integer shortestDistance = null;
- *
- * for (String test : new String[] { "Appaloosa", "a patchy", "apple" }) {
- * Integer distance = metricFrom.apply(test);
- * if (shortestDistance == null || distance < shortestDistance) {
- * shortestDistance = distance;
- * mostSimilar = test;
- * }
- * }
- *
- * System.out.println("The string most similar to \"" + target + "\" "
- * + "is \"" + mostSimilar + "\" because "
- * + "its distance is only " + shortestDistance + ".");
- * </pre>
- *
- * @param <R> This is the type of similarity score used by the StringMetric function.
- */
-public class StringMetricFrom<R> {
-
- /**
- * String metric.
- */
- private final StringMetric<R> metric;
- /**
- * Left parameter used in distance function.
- */
- private final CharSequence left;
-
- /**
- * <p>This accepts the metric implementation and the "left" string.</p>
- *
- * @param metric This may not be null.
- * @param left This may be null here,
- * but the StringMetric#compare(CharSequence left, CharSequence right)
- * implementation may not accept nulls.
- */
- public StringMetricFrom(final StringMetric<R> metric, final CharSequence left) {
- if (metric == null) {
- throw new IllegalArgumentException("The metric may not be null.");
- }
-
- this.metric = metric;
- this.left = left;
- }
-
- /**
- * <p>
- * This compares "left" field against the "right" parameter
- * using the "metric" implementation.
- * </p>
- *
- * @param right the second CharSequence
- * @return the similarity score between two CharSequences
- */
- public R apply(CharSequence right) {
- return metric.apply(left, right);
- }
-
- /**
- * Gets the left parameter.
- *
- * @return the left parameter
- */
- public CharSequence getLeft() {
- return left;
- }
-
- /**
- * Gets the right parameter.
- *
- * @return the right parameter
- */
- public StringMetric<R> getMetric() {
- return metric;
- }
-
-}
http://git-wip-us.apache.org/repos/asf/commons-text/blob/b0b9d358/src/test/java/org/apache/commons/text/similarity/FuzzyScoreTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/commons/text/similarity/FuzzyScoreTest.java b/src/test/java/org/apache/commons/text/similarity/FuzzyScoreTest.java
index 44c2eeb..60bc802 100644
--- a/src/test/java/org/apache/commons/text/similarity/FuzzyScoreTest.java
+++ b/src/test/java/org/apache/commons/text/similarity/FuzzyScoreTest.java
@@ -31,29 +31,29 @@ public class FuzzyScoreTest {
@Test
public void testGetFuzzyScore() throws Exception {
- assertEquals(0, (int) ENGLISH_SCORE.apply("", ""));
- assertEquals(0, (int) ENGLISH_SCORE.apply("Workshop", "b"));
- assertEquals(1, (int) ENGLISH_SCORE.apply("Room", "o"));
- assertEquals(1, (int) ENGLISH_SCORE.apply("Workshop", "w"));
- assertEquals(2, (int) ENGLISH_SCORE.apply("Workshop", "ws"));
- assertEquals(4, (int) ENGLISH_SCORE.apply("Workshop", "wo"));
- assertEquals(3, (int) ENGLISH_SCORE.apply(
+ assertEquals(0, (int) ENGLISH_SCORE.fuzzyScore("", ""));
+ assertEquals(0, (int) ENGLISH_SCORE.fuzzyScore("Workshop", "b"));
+ assertEquals(1, (int) ENGLISH_SCORE.fuzzyScore("Room", "o"));
+ assertEquals(1, (int) ENGLISH_SCORE.fuzzyScore("Workshop", "w"));
+ assertEquals(2, (int) ENGLISH_SCORE.fuzzyScore("Workshop", "ws"));
+ assertEquals(4, (int) ENGLISH_SCORE.fuzzyScore("Workshop", "wo"));
+ assertEquals(3, (int) ENGLISH_SCORE.fuzzyScore(
"Apache Software Foundation", "asf"));
}
@Test(expected = IllegalArgumentException.class)
public void testGetFuzzyScore_StringNullLocale() throws Exception {
- ENGLISH_SCORE.apply("not null", null);
+ ENGLISH_SCORE.fuzzyScore("not null", null);
}
@Test(expected = IllegalArgumentException.class)
public void testGetFuzzyScore_NullStringLocale() throws Exception {
- ENGLISH_SCORE.apply(null, "not null");
+ ENGLISH_SCORE.fuzzyScore(null, "not null");
}
@Test(expected = IllegalArgumentException.class)
public void testGetFuzzyScore_NullNullLocale() throws Exception {
- ENGLISH_SCORE.apply(null, null);
+ ENGLISH_SCORE.fuzzyScore(null, null);
}
@Test(expected = IllegalArgumentException.class)
http://git-wip-us.apache.org/repos/asf/commons-text/blob/b0b9d358/src/test/java/org/apache/commons/text/similarity/ParameterizedEditDistanceFromTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/commons/text/similarity/ParameterizedEditDistanceFromTest.java b/src/test/java/org/apache/commons/text/similarity/ParameterizedEditDistanceFromTest.java
new file mode 100644
index 0000000..5a4d6d1
--- /dev/null
+++ b/src/test/java/org/apache/commons/text/similarity/ParameterizedEditDistanceFromTest.java
@@ -0,0 +1,92 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.text.similarity;
+
+import static org.hamcrest.core.IsEqual.equalTo;
+import static org.junit.Assert.assertThat;
+
+import java.util.Arrays;
+
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+import org.junit.runners.Parameterized.Parameters;
+
+/**
+ * Unit tests for {@link org.apache.commons.text.similarity.EditDistanceFrom}.
+ *
+ * @param <R> The {@link EditDistance} return type.
+ */
+@RunWith(Parameterized.class)
+public class ParameterizedEditDistanceFromTest<R> {
+
+ private final EditDistance<R> editDistance;
+ private final CharSequence left;
+ private final CharSequence right;
+ private final R distance;
+
+ public ParameterizedEditDistanceFromTest(
+ final EditDistance<R> editDistance,
+ final CharSequence left, final CharSequence right,
+ final R distance) {
+
+ this.editDistance = editDistance;
+ this.left = left;
+ this.right = right;
+ this.distance = distance;
+ }
+
+ @Parameters
+ public static Iterable<Object[]> parameters() {
+ return Arrays.asList( new Object[][] {
+
+ /* TODO: When SANDBOX-491 is ready, add a few FuzzyScore tests. */
+
+ { new HammingDistance(), "Sam I am.", "Ham I am.", 1 },
+ { new HammingDistance(), "Japtheth, Ham, Shem", "Japtheth, HAM, Shem", 2 },
+ { new HammingDistance(), "Hamming", "Hamming", 0 },
+
+ { new JaroWrinklerDistance(), "elephant", "hippo", 0.44 },
+ { new JaroWrinklerDistance(), "hippo", "elephant", 0.44 },
+ { new JaroWrinklerDistance(), "hippo", "zzzzzzzz", 0.0 },
+
+ /* TODO: When SANDBOX-491 is ready, add a few limited/threshold tests. */
+ { new LevenshteinDistance(), "Apache", "a patchy", 4 },
+ { new LevenshteinDistance(), "go", "no go", 3 },
+ { new LevenshteinDistance(), "go", "go", 0 },
+
+ {
+ new EditDistance<Boolean>() {
+ public Boolean apply(CharSequence left, CharSequence right) {
+ return left == right || (left != null && left.equals(right));
+ }
+ },
+ "Bob's your uncle.",
+ "Every good boy does fine.",
+ false
+ }
+
+ } );
+ }
+
+ @Test
+ public void test() {
+ EditDistanceFrom<R> editDistanceFrom = new EditDistanceFrom<R>(editDistance, left);
+ assertThat(editDistanceFrom.apply(right), equalTo(distance));
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/commons-text/blob/b0b9d358/src/test/java/org/apache/commons/text/similarity/ParameterizedStringMetricFromTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/commons/text/similarity/ParameterizedStringMetricFromTest.java b/src/test/java/org/apache/commons/text/similarity/ParameterizedStringMetricFromTest.java
deleted file mode 100644
index 36c03bb..0000000
--- a/src/test/java/org/apache/commons/text/similarity/ParameterizedStringMetricFromTest.java
+++ /dev/null
@@ -1,92 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.commons.text.similarity;
-
-import static org.hamcrest.core.IsEqual.equalTo;
-import static org.junit.Assert.assertThat;
-
-import java.util.Arrays;
-
-import org.junit.Test;
-import org.junit.runner.RunWith;
-import org.junit.runners.Parameterized;
-import org.junit.runners.Parameterized.Parameters;
-
-/**
- * Unit tests for {@link org.apache.commons.text.similarity.StringMetricFrom}.
- *
- * @param <R> The {@link StringMetric} return type.
- */
-@RunWith(Parameterized.class)
-public class ParameterizedStringMetricFromTest<R> {
-
- private final StringMetric<R> metric;
- private final CharSequence left;
- private final CharSequence right;
- private final R distance;
-
- public ParameterizedStringMetricFromTest(
- final StringMetric<R> metric,
- final CharSequence left, final CharSequence right,
- final R distance) {
-
- this.metric = metric;
- this.left = left;
- this.right = right;
- this.distance = distance;
- }
-
- @Parameters
- public static Iterable<Object[]> parameters() {
- return Arrays.asList( new Object[][] {
-
- /* TODO: When SANDBOX-491 is ready, add a few FuzzyScore tests. */
-
- { new HammingDistance(), "Sam I am.", "Ham I am.", 1 },
- { new HammingDistance(), "Japtheth, Ham, Shem", "Japtheth, HAM, Shem", 2 },
- { new HammingDistance(), "Hamming", "Hamming", 0 },
-
- { new JaroWrinklerDistance(), "elephant", "hippo", 0.44 },
- { new JaroWrinklerDistance(), "hippo", "elephant", 0.44 },
- { new JaroWrinklerDistance(), "hippo", "zzzzzzzz", 0.0 },
-
- /* TODO: When SANDBOX-491 is ready, add a few limited/threshold tests. */
- { new LevenshteinDistance(), "Apache", "a patchy", 4 },
- { new LevenshteinDistance(), "go", "no go", 3 },
- { new LevenshteinDistance(), "go", "go", 0 },
-
- {
- new StringMetric<Boolean>() {
- public Boolean apply(CharSequence left, CharSequence right) {
- return left == right || (left != null && left.equals(right));
- }
- },
- "Bob's your uncle.",
- "Every good boy does fine.",
- false
- }
-
- } );
- }
-
- @Test
- public void test() {
- StringMetricFrom<R> metricFrom = new StringMetricFrom<R>(metric, left);
- assertThat(metricFrom.apply(right), equalTo(distance));
- }
-
-}
http://git-wip-us.apache.org/repos/asf/commons-text/blob/b0b9d358/src/test/java/org/apache/commons/text/similarity/StringMetricFromTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/commons/text/similarity/StringMetricFromTest.java b/src/test/java/org/apache/commons/text/similarity/StringMetricFromTest.java
index e268366..de59452 100644
--- a/src/test/java/org/apache/commons/text/similarity/StringMetricFromTest.java
+++ b/src/test/java/org/apache/commons/text/similarity/StringMetricFromTest.java
@@ -22,17 +22,17 @@ import static org.junit.Assert.assertThat;
import org.junit.Test;
/**
- * Unit tests for {@link org.apache.commons.text.similarity.StringMetricFrom}.
+ * Unit tests for {@link org.apache.commons.text.similarity.EditDistanceFrom}.
*/
public class StringMetricFromTest {
@Test
public void testEquivalence() {
- StringMetric<Integer> metric = new LevenshteinDistance();
+ EditDistance<Integer> metric = new LevenshteinDistance();
String left = "Apache";
String right = "a patchy";
Integer distance = 4;
- StringMetricFrom<Integer> metricFrom = new StringMetricFrom<Integer>(metric, left);
+ EditDistanceFrom<Integer> metricFrom = new EditDistanceFrom<Integer>(metric, left);
assertThat(metricFrom.apply(right), equalTo(distance));
assertThat(metricFrom.apply(right), equalTo(metric.apply(left, right)));
@@ -40,10 +40,10 @@ public class StringMetricFromTest {
@Test
public void testJavadocExample() {
- StringMetric<Integer> metric = new LevenshteinDistance();
+ EditDistance<Integer> metric = new LevenshteinDistance();
String target = "Apache";
- StringMetricFrom<Integer> metricFrom =
- new StringMetricFrom<Integer>(metric, target);
+ EditDistanceFrom<Integer> metricFrom =
+ new EditDistanceFrom<Integer>(metric, target);
String mostSimilar = null;
Integer shortestDistance = null;
@@ -65,7 +65,7 @@ public class StringMetricFromTest {
@Test(expected = IllegalArgumentException.class)
public void testMissingMetric() {
- new StringMetricFrom<Number>(null, "no go");
+ new EditDistanceFrom<Number>(null, "no go");
}
}