You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@commons.apache.org by ki...@apache.org on 2015/02/14 16:57:02 UTC

[text] SANDBOX-488 rename FuzzyDistance to FuzzyScore

Repository: commons-text
Updated Branches:
  refs/heads/master 9dd58bce9 -> 1e7d2aa50


SANDBOX-488 rename FuzzyDistance to FuzzyScore


Project: http://git-wip-us.apache.org/repos/asf/commons-text/repo
Commit: http://git-wip-us.apache.org/repos/asf/commons-text/commit/1e7d2aa5
Tree: http://git-wip-us.apache.org/repos/asf/commons-text/tree/1e7d2aa5
Diff: http://git-wip-us.apache.org/repos/asf/commons-text/diff/1e7d2aa5

Branch: refs/heads/master
Commit: 1e7d2aa5057ad5e067ec9cd762ab8772546bc777
Parents: 9dd58bc
Author: Bruno P. Kinoshita <ki...@apache.org>
Authored: Sat Feb 14 13:56:55 2015 -0200
Committer: Bruno P. Kinoshita <ki...@apache.org>
Committed: Sat Feb 14 13:56:55 2015 -0200

----------------------------------------------------------------------
 .../commons/text/similarity/FuzzyDistance.java  | 133 -------------------
 .../commons/text/similarity/FuzzyScore.java     | 133 +++++++++++++++++++
 .../text/similarity/FuzzyDistanceTest.java      |  75 -----------
 .../commons/text/similarity/FuzzyScoreTest.java |  75 +++++++++++
 4 files changed, 208 insertions(+), 208 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/commons-text/blob/1e7d2aa5/src/main/java/org/apache/commons/text/similarity/FuzzyDistance.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/similarity/FuzzyDistance.java b/src/main/java/org/apache/commons/text/similarity/FuzzyDistance.java
deleted file mode 100644
index 4d175a0..0000000
--- a/src/main/java/org/apache/commons/text/similarity/FuzzyDistance.java
+++ /dev/null
@@ -1,133 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.commons.text.similarity;
-
-import java.util.Locale;
-
-/**
- * A matching algorithm that is similar to the searching algorithms implemented in editors such
- * as Sublime Text, TextMate, Atom and others.
- *
- * <p>
- * One point is given for every matched character. Subsequent matches yield two bonus points. A higher score
- * indicates a higher similarity.
- * </p>
- *
- * <p>
- * This code has been adapted from Apache Commons Lang 3.3.
- * </p>
- */
-public class FuzzyDistance implements StringMetric<Integer> {
-
-    /**
-     * <p>
-     * Find the Fuzzy Distance which indicates the similarity score between two
-     * Strings. This method uses the default locale.
-     * </p>
-     *
-     * @param term a full term that should be matched against, must not be null
-     * @param query the query that will be matched against a term, must not be
-     *            null
-     * @return result score
-     * @throws IllegalArgumentException if either String input {@code null}
-     */
-    @Override
-    public Integer compare(CharSequence term, CharSequence query) {
-        return compare(term, query, Locale.getDefault());
-    }
-
-    /**
-     * <p>
-     * Find the Fuzzy Distance which indicates the similarity score between two
-     * Strings.
-     * </p>
-     *
-     * <pre>
-     * distance.compare(null, null, null)                                    = IllegalArgumentException
-     * distance.compare("", "", Locale.ENGLISH)                              = 0
-     * distance.compare("Workshop", "b", Locale.ENGLISH)                     = 0
-     * distance.compare("Room", "o", Locale.ENGLISH)                         = 1
-     * distance.compare("Workshop", "w", Locale.ENGLISH)                     = 1
-     * distance.compare("Workshop", "ws", Locale.ENGLISH)                    = 2
-     * distance.compare("Workshop", "wo", Locale.ENGLISH)                    = 4
-     * distance.compare("Apache Software Foundation", "asf", Locale.ENGLISH) = 3
-     * </pre>
-     *
-     * @param term a full term that should be matched against, must not be null
-     * @param query the query that will be matched against a term, must not be
-     *            null
-     * @param locale This string matching logic is case insensitive. A locale is
-     *            necessary to normalize both Strings to lower case.
-     * @return result score
-     * @throws IllegalArgumentException if either String input {@code null} or
-     *             Locale input {@code null}
-     */
-    public Integer compare(CharSequence term, CharSequence query, Locale locale) {
-        if (term == null || query == null) {
-            throw new IllegalArgumentException("Strings must not be null");
-        } else if (locale == null) {
-            throw new IllegalArgumentException("Locale must not be null");
-        }
-
-        // fuzzy logic is case insensitive. We normalize the Strings to lower
-        // case right from the start. Turning characters to lower case
-        // via Character.toLowerCase(char) is unfortunately insufficient
-        // as it does not accept a locale.
-        final String termLowerCase = term.toString().toLowerCase(locale);
-        final String queryLowerCase = query.toString().toLowerCase(locale);
-
-        // the resulting score
-        int score = 0;
-
-        // the position in the term which will be scanned next for potential
-        // query character matches
-        int termIndex = 0;
-
-        // index of the previously matched character in the term
-        int previousMatchingCharacterIndex = Integer.MIN_VALUE;
-
-        for (int queryIndex = 0; queryIndex < queryLowerCase.length(); queryIndex++) {
-            final char queryChar = queryLowerCase.charAt(queryIndex);
-
-            boolean termCharacterMatchFound = false;
-            for (; termIndex < termLowerCase.length()
-                    && !termCharacterMatchFound; termIndex++) {
-                final char termChar = termLowerCase.charAt(termIndex);
-
-                if (queryChar == termChar) {
-                    // simple character matches result in one point
-                    score++;
-
-                    // subsequent character matches further improve
-                    // the score.
-                    if (previousMatchingCharacterIndex + 1 == termIndex) {
-                        score += 2;
-                    }
-
-                    previousMatchingCharacterIndex = termIndex;
-
-                    // we can leave the nested loop. Every character in the
-                    // query can match at most one character in the term.
-                    termCharacterMatchFound = true;
-                }
-            }
-        }
-
-        return score;
-    }
-
-}

http://git-wip-us.apache.org/repos/asf/commons-text/blob/1e7d2aa5/src/main/java/org/apache/commons/text/similarity/FuzzyScore.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/similarity/FuzzyScore.java b/src/main/java/org/apache/commons/text/similarity/FuzzyScore.java
new file mode 100644
index 0000000..3e72d05
--- /dev/null
+++ b/src/main/java/org/apache/commons/text/similarity/FuzzyScore.java
@@ -0,0 +1,133 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.text.similarity;
+
+import java.util.Locale;
+
+/**
+ * A matching algorithm that is similar to the searching algorithms implemented in editors such
+ * as Sublime Text, TextMate, Atom and others.
+ *
+ * <p>
+ * One point is given for every matched character. Subsequent matches yield two bonus points. A higher score
+ * indicates a higher similarity.
+ * </p>
+ *
+ * <p>
+ * This code has been adapted from Apache Commons Lang 3.3.
+ * </p>
+ */
+public class FuzzyScore implements StringMetric<Integer> {
+
+    /**
+     * <p>
+     * Find the Fuzzy Score which indicates the similarity score between two
+     * Strings. This method uses the default locale.
+     * </p>
+     *
+     * @param term a full term that should be matched against, must not be null
+     * @param query the query that will be matched against a term, must not be
+     *            null
+     * @return result score
+     * @throws IllegalArgumentException if either String input {@code null}
+     */
+    @Override
+    public Integer compare(CharSequence term, CharSequence query) {
+        return compare(term, query, Locale.getDefault());
+    }
+
+    /**
+     * <p>
+     * Find the Fuzzy Score which indicates the similarity score between two
+     * Strings.
+     * </p>
+     *
+     * <pre>
+     * score.compare(null, null, null)                                    = IllegalArgumentException
+     * score.compare("", "", Locale.ENGLISH)                              = 0
+     * score.compare("Workshop", "b", Locale.ENGLISH)                     = 0
+     * score.compare("Room", "o", Locale.ENGLISH)                         = 1
+     * score.compare("Workshop", "w", Locale.ENGLISH)                     = 1
+     * score.compare("Workshop", "ws", Locale.ENGLISH)                    = 2
+     * score.compare("Workshop", "wo", Locale.ENGLISH)                    = 4
+     * score.compare("Apache Software Foundation", "asf", Locale.ENGLISH) = 3
+     * </pre>
+     *
+     * @param term a full term that should be matched against, must not be null
+     * @param query the query that will be matched against a term, must not be
+     *            null
+     * @param locale This string matching logic is case insensitive. A locale is
+     *            necessary to normalize both Strings to lower case.
+     * @return result score
+     * @throws IllegalArgumentException if either String input {@code null} or
+     *             Locale input {@code null}
+     */
+    public Integer compare(CharSequence term, CharSequence query, Locale locale) {
+        if (term == null || query == null) {
+            throw new IllegalArgumentException("Strings must not be null");
+        } else if (locale == null) {
+            throw new IllegalArgumentException("Locale must not be null");
+        }
+
+        // fuzzy logic is case insensitive. We normalize the Strings to lower
+        // case right from the start. Turning characters to lower case
+        // via Character.toLowerCase(char) is unfortunately insufficient
+        // as it does not accept a locale.
+        final String termLowerCase = term.toString().toLowerCase(locale);
+        final String queryLowerCase = query.toString().toLowerCase(locale);
+
+        // the resulting score
+        int score = 0;
+
+        // the position in the term which will be scanned next for potential
+        // query character matches
+        int termIndex = 0;
+
+        // index of the previously matched character in the term
+        int previousMatchingCharacterIndex = Integer.MIN_VALUE;
+
+        for (int queryIndex = 0; queryIndex < queryLowerCase.length(); queryIndex++) {
+            final char queryChar = queryLowerCase.charAt(queryIndex);
+
+            boolean termCharacterMatchFound = false;
+            for (; termIndex < termLowerCase.length()
+                    && !termCharacterMatchFound; termIndex++) {
+                final char termChar = termLowerCase.charAt(termIndex);
+
+                if (queryChar == termChar) {
+                    // simple character matches result in one point
+                    score++;
+
+                    // subsequent character matches further improve
+                    // the score.
+                    if (previousMatchingCharacterIndex + 1 == termIndex) {
+                        score += 2;
+                    }
+
+                    previousMatchingCharacterIndex = termIndex;
+
+                    // we can leave the nested loop. Every character in the
+                    // query can match at most one character in the term.
+                    termCharacterMatchFound = true;
+                }
+            }
+        }
+
+        return score;
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/commons-text/blob/1e7d2aa5/src/test/java/org/apache/commons/text/similarity/FuzzyDistanceTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/commons/text/similarity/FuzzyDistanceTest.java b/src/test/java/org/apache/commons/text/similarity/FuzzyDistanceTest.java
deleted file mode 100644
index 49e51ba..0000000
--- a/src/test/java/org/apache/commons/text/similarity/FuzzyDistanceTest.java
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- * 
- *      http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.commons.text.similarity;
-
-import static org.junit.Assert.assertEquals;
-
-import java.util.Locale;
-
-import org.junit.BeforeClass;
-import org.junit.Test;
-
-/**
- * Unit tests for {@link org.apache.commons.text.FuzzyDistance}.
- */
-public class FuzzyDistanceTest {
-
-    private static FuzzyDistance distance;
-
-    @BeforeClass
-    public static void setUp() {
-        distance = new FuzzyDistance();
-    }
-
-    @Test
-    public void testGetFuzzyDistance() throws Exception {
-        assertEquals(0, (int) distance.compare("", "", Locale.ENGLISH));
-        assertEquals(0,
-                (int) distance.compare("Workshop", "b", Locale.ENGLISH));
-        assertEquals(1,
-                (int) distance.compare("Room", "o", Locale.ENGLISH));
-        assertEquals(1,
-                (int) distance.compare("Workshop", "w", Locale.ENGLISH));
-        assertEquals(2,
-                (int) distance.compare("Workshop", "ws", Locale.ENGLISH));
-        assertEquals(4,
-                (int) distance.compare("Workshop", "wo", Locale.ENGLISH));
-        assertEquals(3, (int) distance.compare(
-                "Apache Software Foundation", "asf", Locale.ENGLISH));
-    }
-
-    @Test(expected = IllegalArgumentException.class)
-    public void testGetFuzzyDistance_NullNullNull() throws Exception {
-        distance.compare(null, null, null);
-    }
-
-    @Test(expected = IllegalArgumentException.class)
-    public void testGetFuzzyDistance_StringNullLoclae() throws Exception {
-        distance.compare(" ", null, Locale.ENGLISH);
-    }
-
-    @Test(expected = IllegalArgumentException.class)
-    public void testGetFuzzyDistance_NullStringLocale() throws Exception {
-        distance.compare(null, "clear", Locale.ENGLISH);
-    }
-
-    @Test(expected = IllegalArgumentException.class)
-    public void testGetFuzzyDistance_StringStringNull() throws Exception {
-        distance.compare(" ", "clear", null);
-    }
-
-}

http://git-wip-us.apache.org/repos/asf/commons-text/blob/1e7d2aa5/src/test/java/org/apache/commons/text/similarity/FuzzyScoreTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/commons/text/similarity/FuzzyScoreTest.java b/src/test/java/org/apache/commons/text/similarity/FuzzyScoreTest.java
new file mode 100644
index 0000000..b2fab14
--- /dev/null
+++ b/src/test/java/org/apache/commons/text/similarity/FuzzyScoreTest.java
@@ -0,0 +1,75 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * 
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.text.similarity;
+
+import static org.junit.Assert.assertEquals;
+
+import java.util.Locale;
+
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+/**
+ * Unit tests for {@link org.apache.commons.text.FuzzyScore}.
+ */
+public class FuzzyScoreTest {
+
+    private static FuzzyScore score;
+
+    @BeforeClass
+    public static void setUp() {
+        score = new FuzzyScore();
+    }
+
+    @Test
+    public void testGetFuzzyScore() throws Exception {
+        assertEquals(0, (int) score.compare("", "", Locale.ENGLISH));
+        assertEquals(0,
+                (int) score.compare("Workshop", "b", Locale.ENGLISH));
+        assertEquals(1,
+                (int) score.compare("Room", "o", Locale.ENGLISH));
+        assertEquals(1,
+                (int) score.compare("Workshop", "w", Locale.ENGLISH));
+        assertEquals(2,
+                (int) score.compare("Workshop", "ws", Locale.ENGLISH));
+        assertEquals(4,
+                (int) score.compare("Workshop", "wo", Locale.ENGLISH));
+        assertEquals(3, (int) score.compare(
+                "Apache Software Foundation", "asf", Locale.ENGLISH));
+    }
+
+    @Test(expected = IllegalArgumentException.class)
+    public void testGetFuzzyScore_NullNullNull() throws Exception {
+        score.compare(null, null, null);
+    }
+
+    @Test(expected = IllegalArgumentException.class)
+    public void testGetFuzzyScore_StringNullLoclae() throws Exception {
+        score.compare(" ", null, Locale.ENGLISH);
+    }
+
+    @Test(expected = IllegalArgumentException.class)
+    public void testGetFuzzyScore_NullStringLocale() throws Exception {
+        score.compare(null, "clear", Locale.ENGLISH);
+    }
+
+    @Test(expected = IllegalArgumentException.class)
+    public void testGetFuzzyScore_StringStringNull() throws Exception {
+        score.compare(" ", "clear", null);
+    }
+
+}