You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@commons.apache.org by ki...@apache.org on 2014/12/13 04:12:18 UTC

[text] SANDBOX-485 Add Hamming distance

Repository: commons-text
Updated Branches:
  refs/heads/master 413aeeb1a -> 87b789fbe


SANDBOX-485 Add Hamming distance


Project: http://git-wip-us.apache.org/repos/asf/commons-text/repo
Commit: http://git-wip-us.apache.org/repos/asf/commons-text/commit/87b789fb
Tree: http://git-wip-us.apache.org/repos/asf/commons-text/tree/87b789fb
Diff: http://git-wip-us.apache.org/repos/asf/commons-text/diff/87b789fb

Branch: refs/heads/master
Commit: 87b789fbec0360a7e2c54f7cb45225e238530ee8
Parents: 413aeeb
Author: Bruno P. Kinoshita <ki...@apache.org>
Authored: Sat Dec 13 01:09:40 2014 -0200
Committer: Bruno P. Kinoshita <ki...@apache.org>
Committed: Sat Dec 13 01:12:01 2014 -0200

----------------------------------------------------------------------
 src/changes/changes.xml                         |  1 +
 .../text/similarity/HammingDistance.java        | 77 ++++++++++++++++++++
 .../text/similarity/HammingDistanceTest.java    | 58 +++++++++++++++
 3 files changed, 136 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/commons-text/blob/87b789fb/src/changes/changes.xml
----------------------------------------------------------------------
diff --git a/src/changes/changes.xml b/src/changes/changes.xml
index 416f579..d8c3fdf 100644
--- a/src/changes/changes.xml
+++ b/src/changes/changes.xml
@@ -22,6 +22,7 @@
   <body>
 
   <release version="1.0" date="tba" description="tba">
+    <action issue="SANDBOX-485" type="add" dev="kinow">Add Hamming distance</action>
   </release>
 
   </body>

http://git-wip-us.apache.org/repos/asf/commons-text/blob/87b789fb/src/main/java/org/apache/commons/text/similarity/HammingDistance.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/similarity/HammingDistance.java b/src/main/java/org/apache/commons/text/similarity/HammingDistance.java
new file mode 100644
index 0000000..13621b4
--- /dev/null
+++ b/src/main/java/org/apache/commons/text/similarity/HammingDistance.java
@@ -0,0 +1,77 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.text.similarity;
+
+/**
+ * <p>
+ * The hamming distance between two strings of equal length is the number of
+ * positions at which the corresponding symbols are different.
+ * </p>
+ *
+ * <p>
+ * For further explanation about the hamming distance, take a look at its
+ * Wikipedia page at http://en.wikipedia.org/wiki/Hamming_distance.
+ * </p>
+ *
+ * @since 1.0
+ */
+public class HammingDistance implements StringMetric<Integer> {
+
+    /**
+     * <p>Find the hamming distance between two strings with the same
+     * length.</p>
+     *
+     * <p>The distance starts with zero, and for each occurrence of a
+     * different character in either String, it increments the distance
+     * by 1, and finally return its value.</p>
+     *
+     * <pre>
+     * distance.compare("", "")               = 0
+     * distance.compare("pappa", "pappa")     = 0
+     * distance.compare("1011101", "1011111") = 1
+     * distance.compare("ATCG", "ACCC")       = 2
+     * distance.compare("karolin", "kerstin"  = 3
+     * </pre>
+     *
+     * @param left the first string, must not be null
+     * @param right the second string, must not be null
+     * @return distance
+     * @throws IllegalArgumentException if either String input {@code null} or
+     *             if they do not have the same length
+     */
+    @Override
+    public Integer compare(CharSequence left, CharSequence right) {
+        if (left == null || right == null) {
+            throw new IllegalArgumentException("Strings must not be null");
+        }
+
+        if (left.length() != right.length()) {
+            throw new IllegalArgumentException("Strings must have the same length");
+        }
+
+        int distance = 0;
+
+        for (int i = 0; i < left.length(); i++) {
+            if (left.charAt(i) != right.charAt(i)) {
+                distance++;
+            }
+        }
+
+        return distance;
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/commons-text/blob/87b789fb/src/test/java/org/apache/commons/text/similarity/HammingDistanceTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/commons/text/similarity/HammingDistanceTest.java b/src/test/java/org/apache/commons/text/similarity/HammingDistanceTest.java
new file mode 100644
index 0000000..aac2fa3
--- /dev/null
+++ b/src/test/java/org/apache/commons/text/similarity/HammingDistanceTest.java
@@ -0,0 +1,58 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * 
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.text.similarity;
+
+import static org.junit.Assert.assertEquals;
+
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+/**
+ * Unit tests for {@link org.apache.commons.text.similarity.HammingDistance}.
+ */
+public class HammingDistanceTest {
+
+    private static HammingDistance distance;
+
+    @BeforeClass
+    public static void setUp() {
+        distance = new HammingDistance();
+    }
+
+    @Test
+    public void testHammingDistance() {
+        assertEquals(Integer.valueOf(0), distance.compare("", ""));
+        assertEquals(Integer.valueOf(0), distance.compare("pappa", "pappa"));
+        assertEquals(Integer.valueOf(1), distance.compare("papaa", "pappa"));
+        assertEquals(Integer.valueOf(3), distance.compare("karolin", "kathrin"));
+        assertEquals(Integer.valueOf(3), distance.compare("karolin", "kerstin"));
+        assertEquals(Integer.valueOf(2), distance.compare("1011101", "1001001"));
+        assertEquals(Integer.valueOf(3), distance.compare("2173896", "2233796"));
+        assertEquals(Integer.valueOf(2), distance.compare("ATCG", "ACCC"));
+    }
+
+    @Test(expected=IllegalArgumentException.class)
+    public void testHammingDistance_nullLeftValue() {
+        distance.compare(null, "");
+    }
+
+    @Test(expected=IllegalArgumentException.class)
+    public void testHammingDistance_nullRightValue() {
+        distance.compare("", null);
+    }
+
+}