You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@commons.apache.org by ki...@apache.org on 2014/12/13 04:12:18 UTC
[text] SANDBOX-485 Add Hamming distance
Repository: commons-text
Updated Branches:
refs/heads/master 413aeeb1a -> 87b789fbe
SANDBOX-485 Add Hamming distance
Project: http://git-wip-us.apache.org/repos/asf/commons-text/repo
Commit: http://git-wip-us.apache.org/repos/asf/commons-text/commit/87b789fb
Tree: http://git-wip-us.apache.org/repos/asf/commons-text/tree/87b789fb
Diff: http://git-wip-us.apache.org/repos/asf/commons-text/diff/87b789fb
Branch: refs/heads/master
Commit: 87b789fbec0360a7e2c54f7cb45225e238530ee8
Parents: 413aeeb
Author: Bruno P. Kinoshita <ki...@apache.org>
Authored: Sat Dec 13 01:09:40 2014 -0200
Committer: Bruno P. Kinoshita <ki...@apache.org>
Committed: Sat Dec 13 01:12:01 2014 -0200
----------------------------------------------------------------------
src/changes/changes.xml | 1 +
.../text/similarity/HammingDistance.java | 77 ++++++++++++++++++++
.../text/similarity/HammingDistanceTest.java | 58 +++++++++++++++
3 files changed, 136 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/commons-text/blob/87b789fb/src/changes/changes.xml
----------------------------------------------------------------------
diff --git a/src/changes/changes.xml b/src/changes/changes.xml
index 416f579..d8c3fdf 100644
--- a/src/changes/changes.xml
+++ b/src/changes/changes.xml
@@ -22,6 +22,7 @@
<body>
<release version="1.0" date="tba" description="tba">
+ <action issue="SANDBOX-485" type="add" dev="kinow">Add Hamming distance</action>
</release>
</body>
http://git-wip-us.apache.org/repos/asf/commons-text/blob/87b789fb/src/main/java/org/apache/commons/text/similarity/HammingDistance.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/similarity/HammingDistance.java b/src/main/java/org/apache/commons/text/similarity/HammingDistance.java
new file mode 100644
index 0000000..13621b4
--- /dev/null
+++ b/src/main/java/org/apache/commons/text/similarity/HammingDistance.java
@@ -0,0 +1,77 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.text.similarity;
+
+/**
+ * <p>
+ * The hamming distance between two strings of equal length is the number of
+ * positions at which the corresponding symbols are different.
+ * </p>
+ *
+ * <p>
+ * For further explanation about the hamming distance, take a look at its
+ * Wikipedia page at http://en.wikipedia.org/wiki/Hamming_distance.
+ * </p>
+ *
+ * @since 1.0
+ */
+public class HammingDistance implements StringMetric<Integer> {
+
+ /**
+ * <p>Find the hamming distance between two strings with the same
+ * length.</p>
+ *
+ * <p>The distance starts with zero, and for each occurrence of a
+ * different character in either String, it increments the distance
+ * by 1, and finally return its value.</p>
+ *
+ * <pre>
+ * distance.compare("", "") = 0
+ * distance.compare("pappa", "pappa") = 0
+ * distance.compare("1011101", "1011111") = 1
+ * distance.compare("ATCG", "ACCC") = 2
+ * distance.compare("karolin", "kerstin" = 3
+ * </pre>
+ *
+ * @param left the first string, must not be null
+ * @param right the second string, must not be null
+ * @return distance
+ * @throws IllegalArgumentException if either String input {@code null} or
+ * if they do not have the same length
+ */
+ @Override
+ public Integer compare(CharSequence left, CharSequence right) {
+ if (left == null || right == null) {
+ throw new IllegalArgumentException("Strings must not be null");
+ }
+
+ if (left.length() != right.length()) {
+ throw new IllegalArgumentException("Strings must have the same length");
+ }
+
+ int distance = 0;
+
+ for (int i = 0; i < left.length(); i++) {
+ if (left.charAt(i) != right.charAt(i)) {
+ distance++;
+ }
+ }
+
+ return distance;
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/commons-text/blob/87b789fb/src/test/java/org/apache/commons/text/similarity/HammingDistanceTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/commons/text/similarity/HammingDistanceTest.java b/src/test/java/org/apache/commons/text/similarity/HammingDistanceTest.java
new file mode 100644
index 0000000..aac2fa3
--- /dev/null
+++ b/src/test/java/org/apache/commons/text/similarity/HammingDistanceTest.java
@@ -0,0 +1,58 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.text.similarity;
+
+import static org.junit.Assert.assertEquals;
+
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+/**
+ * Unit tests for {@link org.apache.commons.text.similarity.HammingDistance}.
+ */
+public class HammingDistanceTest {
+
+ private static HammingDistance distance;
+
+ @BeforeClass
+ public static void setUp() {
+ distance = new HammingDistance();
+ }
+
+ @Test
+ public void testHammingDistance() {
+ assertEquals(Integer.valueOf(0), distance.compare("", ""));
+ assertEquals(Integer.valueOf(0), distance.compare("pappa", "pappa"));
+ assertEquals(Integer.valueOf(1), distance.compare("papaa", "pappa"));
+ assertEquals(Integer.valueOf(3), distance.compare("karolin", "kathrin"));
+ assertEquals(Integer.valueOf(3), distance.compare("karolin", "kerstin"));
+ assertEquals(Integer.valueOf(2), distance.compare("1011101", "1001001"));
+ assertEquals(Integer.valueOf(3), distance.compare("2173896", "2233796"));
+ assertEquals(Integer.valueOf(2), distance.compare("ATCG", "ACCC"));
+ }
+
+ @Test(expected=IllegalArgumentException.class)
+ public void testHammingDistance_nullLeftValue() {
+ distance.compare(null, "");
+ }
+
+ @Test(expected=IllegalArgumentException.class)
+ public void testHammingDistance_nullRightValue() {
+ distance.compare("", null);
+ }
+
+}