You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@commons.apache.org by ki...@apache.org on 2015/02/14 16:57:02 UTC
[text] SANDBOX-488 rename FuzzyDistance to FuzzyScore
Repository: commons-text
Updated Branches:
refs/heads/master 9dd58bce9 -> 1e7d2aa50
SANDBOX-488 rename FuzzyDistance to FuzzyScore
Project: http://git-wip-us.apache.org/repos/asf/commons-text/repo
Commit: http://git-wip-us.apache.org/repos/asf/commons-text/commit/1e7d2aa5
Tree: http://git-wip-us.apache.org/repos/asf/commons-text/tree/1e7d2aa5
Diff: http://git-wip-us.apache.org/repos/asf/commons-text/diff/1e7d2aa5
Branch: refs/heads/master
Commit: 1e7d2aa5057ad5e067ec9cd762ab8772546bc777
Parents: 9dd58bc
Author: Bruno P. Kinoshita <ki...@apache.org>
Authored: Sat Feb 14 13:56:55 2015 -0200
Committer: Bruno P. Kinoshita <ki...@apache.org>
Committed: Sat Feb 14 13:56:55 2015 -0200
----------------------------------------------------------------------
.../commons/text/similarity/FuzzyDistance.java | 133 -------------------
.../commons/text/similarity/FuzzyScore.java | 133 +++++++++++++++++++
.../text/similarity/FuzzyDistanceTest.java | 75 -----------
.../commons/text/similarity/FuzzyScoreTest.java | 75 +++++++++++
4 files changed, 208 insertions(+), 208 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/commons-text/blob/1e7d2aa5/src/main/java/org/apache/commons/text/similarity/FuzzyDistance.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/similarity/FuzzyDistance.java b/src/main/java/org/apache/commons/text/similarity/FuzzyDistance.java
deleted file mode 100644
index 4d175a0..0000000
--- a/src/main/java/org/apache/commons/text/similarity/FuzzyDistance.java
+++ /dev/null
@@ -1,133 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.commons.text.similarity;
-
-import java.util.Locale;
-
-/**
- * A matching algorithm that is similar to the searching algorithms implemented in editors such
- * as Sublime Text, TextMate, Atom and others.
- *
- * <p>
- * One point is given for every matched character. Subsequent matches yield two bonus points. A higher score
- * indicates a higher similarity.
- * </p>
- *
- * <p>
- * This code has been adapted from Apache Commons Lang 3.3.
- * </p>
- */
-public class FuzzyDistance implements StringMetric<Integer> {
-
- /**
- * <p>
- * Find the Fuzzy Distance which indicates the similarity score between two
- * Strings. This method uses the default locale.
- * </p>
- *
- * @param term a full term that should be matched against, must not be null
- * @param query the query that will be matched against a term, must not be
- * null
- * @return result score
- * @throws IllegalArgumentException if either String input {@code null}
- */
- @Override
- public Integer compare(CharSequence term, CharSequence query) {
- return compare(term, query, Locale.getDefault());
- }
-
- /**
- * <p>
- * Find the Fuzzy Distance which indicates the similarity score between two
- * Strings.
- * </p>
- *
- * <pre>
- * distance.compare(null, null, null) = IllegalArgumentException
- * distance.compare("", "", Locale.ENGLISH) = 0
- * distance.compare("Workshop", "b", Locale.ENGLISH) = 0
- * distance.compare("Room", "o", Locale.ENGLISH) = 1
- * distance.compare("Workshop", "w", Locale.ENGLISH) = 1
- * distance.compare("Workshop", "ws", Locale.ENGLISH) = 2
- * distance.compare("Workshop", "wo", Locale.ENGLISH) = 4
- * distance.compare("Apache Software Foundation", "asf", Locale.ENGLISH) = 3
- * </pre>
- *
- * @param term a full term that should be matched against, must not be null
- * @param query the query that will be matched against a term, must not be
- * null
- * @param locale This string matching logic is case insensitive. A locale is
- * necessary to normalize both Strings to lower case.
- * @return result score
- * @throws IllegalArgumentException if either String input {@code null} or
- * Locale input {@code null}
- */
- public Integer compare(CharSequence term, CharSequence query, Locale locale) {
- if (term == null || query == null) {
- throw new IllegalArgumentException("Strings must not be null");
- } else if (locale == null) {
- throw new IllegalArgumentException("Locale must not be null");
- }
-
- // fuzzy logic is case insensitive. We normalize the Strings to lower
- // case right from the start. Turning characters to lower case
- // via Character.toLowerCase(char) is unfortunately insufficient
- // as it does not accept a locale.
- final String termLowerCase = term.toString().toLowerCase(locale);
- final String queryLowerCase = query.toString().toLowerCase(locale);
-
- // the resulting score
- int score = 0;
-
- // the position in the term which will be scanned next for potential
- // query character matches
- int termIndex = 0;
-
- // index of the previously matched character in the term
- int previousMatchingCharacterIndex = Integer.MIN_VALUE;
-
- for (int queryIndex = 0; queryIndex < queryLowerCase.length(); queryIndex++) {
- final char queryChar = queryLowerCase.charAt(queryIndex);
-
- boolean termCharacterMatchFound = false;
- for (; termIndex < termLowerCase.length()
- && !termCharacterMatchFound; termIndex++) {
- final char termChar = termLowerCase.charAt(termIndex);
-
- if (queryChar == termChar) {
- // simple character matches result in one point
- score++;
-
- // subsequent character matches further improve
- // the score.
- if (previousMatchingCharacterIndex + 1 == termIndex) {
- score += 2;
- }
-
- previousMatchingCharacterIndex = termIndex;
-
- // we can leave the nested loop. Every character in the
- // query can match at most one character in the term.
- termCharacterMatchFound = true;
- }
- }
- }
-
- return score;
- }
-
-}
http://git-wip-us.apache.org/repos/asf/commons-text/blob/1e7d2aa5/src/main/java/org/apache/commons/text/similarity/FuzzyScore.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/similarity/FuzzyScore.java b/src/main/java/org/apache/commons/text/similarity/FuzzyScore.java
new file mode 100644
index 0000000..3e72d05
--- /dev/null
+++ b/src/main/java/org/apache/commons/text/similarity/FuzzyScore.java
@@ -0,0 +1,133 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.text.similarity;
+
+import java.util.Locale;
+
+/**
+ * A matching algorithm that is similar to the searching algorithms implemented in editors such
+ * as Sublime Text, TextMate, Atom and others.
+ *
+ * <p>
+ * One point is given for every matched character. Subsequent matches yield two bonus points. A higher score
+ * indicates a higher similarity.
+ * </p>
+ *
+ * <p>
+ * This code has been adapted from Apache Commons Lang 3.3.
+ * </p>
+ */
+public class FuzzyScore implements StringMetric<Integer> {
+
+ /**
+ * <p>
+ * Find the Fuzzy Score which indicates the similarity score between two
+ * Strings. This method uses the default locale.
+ * </p>
+ *
+ * @param term a full term that should be matched against, must not be null
+ * @param query the query that will be matched against a term, must not be
+ * null
+ * @return result score
+ * @throws IllegalArgumentException if either String input {@code null}
+ */
+ @Override
+ public Integer compare(CharSequence term, CharSequence query) {
+ return compare(term, query, Locale.getDefault());
+ }
+
+ /**
+ * <p>
+ * Find the Fuzzy Score which indicates the similarity score between two
+ * Strings.
+ * </p>
+ *
+ * <pre>
+ * score.compare(null, null, null) = IllegalArgumentException
+ * score.compare("", "", Locale.ENGLISH) = 0
+ * score.compare("Workshop", "b", Locale.ENGLISH) = 0
+ * score.compare("Room", "o", Locale.ENGLISH) = 1
+ * score.compare("Workshop", "w", Locale.ENGLISH) = 1
+ * score.compare("Workshop", "ws", Locale.ENGLISH) = 2
+ * score.compare("Workshop", "wo", Locale.ENGLISH) = 4
+ * score.compare("Apache Software Foundation", "asf", Locale.ENGLISH) = 3
+ * </pre>
+ *
+ * @param term a full term that should be matched against, must not be null
+ * @param query the query that will be matched against a term, must not be
+ * null
+ * @param locale This string matching logic is case insensitive. A locale is
+ * necessary to normalize both Strings to lower case.
+ * @return result score
+ * @throws IllegalArgumentException if either String input {@code null} or
+ * Locale input {@code null}
+ */
+ public Integer compare(CharSequence term, CharSequence query, Locale locale) {
+ if (term == null || query == null) {
+ throw new IllegalArgumentException("Strings must not be null");
+ } else if (locale == null) {
+ throw new IllegalArgumentException("Locale must not be null");
+ }
+
+ // fuzzy logic is case insensitive. We normalize the Strings to lower
+ // case right from the start. Turning characters to lower case
+ // via Character.toLowerCase(char) is unfortunately insufficient
+ // as it does not accept a locale.
+ final String termLowerCase = term.toString().toLowerCase(locale);
+ final String queryLowerCase = query.toString().toLowerCase(locale);
+
+ // the resulting score
+ int score = 0;
+
+ // the position in the term which will be scanned next for potential
+ // query character matches
+ int termIndex = 0;
+
+ // index of the previously matched character in the term
+ int previousMatchingCharacterIndex = Integer.MIN_VALUE;
+
+ for (int queryIndex = 0; queryIndex < queryLowerCase.length(); queryIndex++) {
+ final char queryChar = queryLowerCase.charAt(queryIndex);
+
+ boolean termCharacterMatchFound = false;
+ for (; termIndex < termLowerCase.length()
+ && !termCharacterMatchFound; termIndex++) {
+ final char termChar = termLowerCase.charAt(termIndex);
+
+ if (queryChar == termChar) {
+ // simple character matches result in one point
+ score++;
+
+ // subsequent character matches further improve
+ // the score.
+ if (previousMatchingCharacterIndex + 1 == termIndex) {
+ score += 2;
+ }
+
+ previousMatchingCharacterIndex = termIndex;
+
+ // we can leave the nested loop. Every character in the
+ // query can match at most one character in the term.
+ termCharacterMatchFound = true;
+ }
+ }
+ }
+
+ return score;
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/commons-text/blob/1e7d2aa5/src/test/java/org/apache/commons/text/similarity/FuzzyDistanceTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/commons/text/similarity/FuzzyDistanceTest.java b/src/test/java/org/apache/commons/text/similarity/FuzzyDistanceTest.java
deleted file mode 100644
index 49e51ba..0000000
--- a/src/test/java/org/apache/commons/text/similarity/FuzzyDistanceTest.java
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.commons.text.similarity;
-
-import static org.junit.Assert.assertEquals;
-
-import java.util.Locale;
-
-import org.junit.BeforeClass;
-import org.junit.Test;
-
-/**
- * Unit tests for {@link org.apache.commons.text.FuzzyDistance}.
- */
-public class FuzzyDistanceTest {
-
- private static FuzzyDistance distance;
-
- @BeforeClass
- public static void setUp() {
- distance = new FuzzyDistance();
- }
-
- @Test
- public void testGetFuzzyDistance() throws Exception {
- assertEquals(0, (int) distance.compare("", "", Locale.ENGLISH));
- assertEquals(0,
- (int) distance.compare("Workshop", "b", Locale.ENGLISH));
- assertEquals(1,
- (int) distance.compare("Room", "o", Locale.ENGLISH));
- assertEquals(1,
- (int) distance.compare("Workshop", "w", Locale.ENGLISH));
- assertEquals(2,
- (int) distance.compare("Workshop", "ws", Locale.ENGLISH));
- assertEquals(4,
- (int) distance.compare("Workshop", "wo", Locale.ENGLISH));
- assertEquals(3, (int) distance.compare(
- "Apache Software Foundation", "asf", Locale.ENGLISH));
- }
-
- @Test(expected = IllegalArgumentException.class)
- public void testGetFuzzyDistance_NullNullNull() throws Exception {
- distance.compare(null, null, null);
- }
-
- @Test(expected = IllegalArgumentException.class)
- public void testGetFuzzyDistance_StringNullLoclae() throws Exception {
- distance.compare(" ", null, Locale.ENGLISH);
- }
-
- @Test(expected = IllegalArgumentException.class)
- public void testGetFuzzyDistance_NullStringLocale() throws Exception {
- distance.compare(null, "clear", Locale.ENGLISH);
- }
-
- @Test(expected = IllegalArgumentException.class)
- public void testGetFuzzyDistance_StringStringNull() throws Exception {
- distance.compare(" ", "clear", null);
- }
-
-}
http://git-wip-us.apache.org/repos/asf/commons-text/blob/1e7d2aa5/src/test/java/org/apache/commons/text/similarity/FuzzyScoreTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/commons/text/similarity/FuzzyScoreTest.java b/src/test/java/org/apache/commons/text/similarity/FuzzyScoreTest.java
new file mode 100644
index 0000000..b2fab14
--- /dev/null
+++ b/src/test/java/org/apache/commons/text/similarity/FuzzyScoreTest.java
@@ -0,0 +1,75 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.text.similarity;
+
+import static org.junit.Assert.assertEquals;
+
+import java.util.Locale;
+
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+/**
+ * Unit tests for {@link org.apache.commons.text.FuzzyScore}.
+ */
+public class FuzzyScoreTest {
+
+ private static FuzzyScore score;
+
+ @BeforeClass
+ public static void setUp() {
+ score = new FuzzyScore();
+ }
+
+ @Test
+ public void testGetFuzzyScore() throws Exception {
+ assertEquals(0, (int) score.compare("", "", Locale.ENGLISH));
+ assertEquals(0,
+ (int) score.compare("Workshop", "b", Locale.ENGLISH));
+ assertEquals(1,
+ (int) score.compare("Room", "o", Locale.ENGLISH));
+ assertEquals(1,
+ (int) score.compare("Workshop", "w", Locale.ENGLISH));
+ assertEquals(2,
+ (int) score.compare("Workshop", "ws", Locale.ENGLISH));
+ assertEquals(4,
+ (int) score.compare("Workshop", "wo", Locale.ENGLISH));
+ assertEquals(3, (int) score.compare(
+ "Apache Software Foundation", "asf", Locale.ENGLISH));
+ }
+
+ @Test(expected = IllegalArgumentException.class)
+ public void testGetFuzzyScore_NullNullNull() throws Exception {
+ score.compare(null, null, null);
+ }
+
+ @Test(expected = IllegalArgumentException.class)
+ public void testGetFuzzyScore_StringNullLoclae() throws Exception {
+ score.compare(" ", null, Locale.ENGLISH);
+ }
+
+ @Test(expected = IllegalArgumentException.class)
+ public void testGetFuzzyScore_NullStringLocale() throws Exception {
+ score.compare(null, "clear", Locale.ENGLISH);
+ }
+
+ @Test(expected = IllegalArgumentException.class)
+ public void testGetFuzzyScore_StringStringNull() throws Exception {
+ score.compare(" ", "clear", null);
+ }
+
+}