You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@commons.apache.org by ki...@apache.org on 2015/04/20 04:59:01 UTC

[04/13] [text] Make HumanNameParser return a name object. Introduce a new wrapper object for strings to be parsed called NameString.

Make HumanNameParser return a name object. Introduce a new wrapper object for strings to be parsed called NameString.


Project: http://git-wip-us.apache.org/repos/asf/commons-text/repo
Commit: http://git-wip-us.apache.org/repos/asf/commons-text/commit/685f9a86
Tree: http://git-wip-us.apache.org/repos/asf/commons-text/tree/685f9a86
Diff: http://git-wip-us.apache.org/repos/asf/commons-text/diff/685f9a86

Branch: refs/heads/master
Commit: 685f9a864d46cc526b14e3a7476465c49d991478
Parents: 9a0cc85
Author: Benedikt Ritter <br...@apache.org>
Authored: Sun Apr 19 16:22:45 2015 +0200
Committer: Benedikt Ritter <br...@apache.org>
Committed: Sun Apr 19 16:22:45 2015 +0200

----------------------------------------------------------------------
 .../commons/text/names/HumanNameParser.java     |  36 ++---
 .../org/apache/commons/text/names/Name.java     | 141 ++++++-------------
 .../apache/commons/text/names/NameString.java   | 134 ++++++++++++++++++
 .../commons/text/names/HumanNameParserTest.java |  24 ++--
 .../commons/text/names/NameStringTest.java      | 104 ++++++++++++++
 .../org/apache/commons/text/names/NameTest.java | 104 --------------
 6 files changed, 315 insertions(+), 228 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/commons-text/blob/685f9a86/src/main/java/org/apache/commons/text/names/HumanNameParser.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/names/HumanNameParser.java b/src/main/java/org/apache/commons/text/names/HumanNameParser.java
index fa2433a..df8e55c 100644
--- a/src/main/java/org/apache/commons/text/names/HumanNameParser.java
+++ b/src/main/java/org/apache/commons/text/names/HumanNameParser.java
@@ -195,14 +195,14 @@ public class HumanNameParser {
     /**
      * Consumes the string and creates the name parts.
      *
-     * @param nameStr the name to parse. Must not be null.
+     * @param name the name to parse. Must not be null.
      * @throws NameParseException if the parser fails to retrieve the name parts.
-     * @throws NullPointerException if nameStr is null.
+     * @throws NullPointerException if name is null.
      */
-    public void parse(String nameStr) {
-        Objects.requireNonNull(nameStr, "Parameter 'nameStr' must not be null.");
+    public Name parse(String name) {
+        Objects.requireNonNull(name, "Parameter 'name' must not be null.");
 
-        Name name = new Name(nameStr);
+        NameString nameString = new NameString(name);
         String suffixes = StringUtils.join(this.suffixes, "\\.*|") + "\\.*";
         String prefixes = StringUtils.join(this.prefixes, " |") + " ";
 
@@ -218,28 +218,30 @@ public class HumanNameParser {
         String firstRegex = "(?i)^([^ ]+)";
 
         // get nickname, if there is one
-        this.nickname = name.chopWithRegex(nicknamesRegex, 2);
+        this.nickname = nameString.chopWithRegex(nicknamesRegex, 2);
 
         // get suffix, if there is one
-        this.suffix = name.chopWithRegex(suffixRegex, 1);
+        this.suffix = nameString.chopWithRegex(suffixRegex, 1);
 
-        // flip the before-comma and after-comma parts of the name
-        name.flip(",");
+        // flip the before-comma and after-comma parts of the nameString
+        nameString.flip(",");
 
-        // get the last name
-        this.last = name.chopWithRegex(lastRegex, 0);
+        // get the last nameString
+        this.last = nameString.chopWithRegex(lastRegex, 0);
 
         // get the first initial, if there is one
-        this.leadingInit = name.chopWithRegex(leadingInitRegex, 1);
+        this.leadingInit = nameString.chopWithRegex(leadingInitRegex, 1);
 
-        // get the first name
-        this.first = name.chopWithRegex(firstRegex, 0);
+        // get the first nameString
+        this.first = nameString.chopWithRegex(firstRegex, 0);
         if (StringUtils.isBlank(this.first)) {
-            throw new NameParseException("Couldn't find a first name in '{" + name.getStr() + "}'");
+            throw new NameParseException("Couldn't find a first name in '{" + nameString.getStr() + "}'");
         }
 
-        // if anything's left, that's the middle name
-        this.middle = name.getStr();
+        // if anything's left, that's the middle nameString
+        this.middle = nameString.getStr();
+        
+        return new Name(leadingInit, first, nickname, middle, last, suffix);
     }
 
 }

http://git-wip-us.apache.org/repos/asf/commons-text/blob/685f9a86/src/main/java/org/apache/commons/text/names/Name.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/names/Name.java b/src/main/java/org/apache/commons/text/names/Name.java
index 0dd2560..3067ba5 100644
--- a/src/main/java/org/apache/commons/text/names/Name.java
+++ b/src/main/java/org/apache/commons/text/names/Name.java
@@ -16,119 +16,70 @@
  */
 package org.apache.commons.text.names;
 
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
+import java.util.Objects;
 
 /**
- * <p>A {@code Name} object that encapsulates a name string, and contains the logic
- * for handling with Regexes.</p>
+ * An object representing the result of parsing a Name.
  *
- * <p>This class is not thread-safe.</p>
+ * <p>This class is immutable.</p>
  */
-public class Name {
+public final class Name {
 
-    /**
-     * Encapsulated string. Not immutable!
-     */
-    private String str;
+    private final String leadingInitial;
+    private final String firstName;
+    private final String nickName;
+    private final String middleName;
+    private final String lastName;
+    private final String suffix;
 
-    /**
-     * Creates a new Name object.
-     *
-     * @param str encapsulated string.
-     */
-    public Name(String str) {
-        this.str = str;
+    Name(String leadingInitial, String firstName, String nickName, String middleName, String lastName, String suffix) {
+        this.leadingInitial = leadingInitial;
+        this.firstName = firstName;
+        this.nickName = nickName;
+        this.middleName = middleName;
+        this.lastName = lastName;
+        this.suffix = suffix;
     }
 
-    /**
-     * Gets the encapsulated string.
-     *
-     * @return encapsulated string
-     */
-    public String getStr() {
-        return str;
+    public String getLeadingInitial() {
+        return leadingInitial;
     }
 
-    /**
-     * Sets the encapsulated string value.
-     *
-     * @param str string value
-     */
-    public void setStr(String str) {
-        this.str = str;
-        this.norm();
+    public String getFirstName() {
+        return firstName;
     }
 
-    /**
-     * Uses a regex to chop off and return part of the namestring.
-     * There are two parts: first, it returns the matched substring,
-     * and then it removes that substring from the encapsulated
-     * string and normalizes it.
-     *
-     * @param regex matches the part of the namestring to chop off
-     * @param submatchIndex which of the parenthesized submatches to use
-     * @return the part of the namestring that got chopped off
-     */
-    public String chopWithRegex(String regex, int submatchIndex) {
-        String chopped = "";
-        Pattern pattern = Pattern.compile(regex);
-        Matcher matcher = pattern.matcher(this.str);
+    public String getNickName() {
+        return nickName;
+    }
 
-        // workdaround for numReplacements in Java
-        int numReplacements = 0;
-        while (matcher.find()) {
-            numReplacements++;
-        }
+    public String getMiddleName() {
+        return middleName;
+    }
 
-        // recreate or the groups are gone
-        pattern = Pattern.compile(regex);
-        matcher = pattern.matcher(this.str);
-        if (matcher.find()) {
-            boolean subset = matcher.groupCount() > submatchIndex;
-            if (subset) {
-                this.str = this.str.replaceAll(regex, " ");
-                if (numReplacements > 1) {
-                    throw new NameParseException("The regex being used to find the name has multiple matches.");
-                }
-                this.norm();
-                return matcher.group(submatchIndex).trim();
-            }
-        }
-        return chopped;
+    public String getLastName() {
+        return lastName;
     }
 
-    /**
-     * Flips the front and back parts of a name with one another.
-     * Front and back are determined by a specified character somewhere in the
-     * middle of the string.
-     *
-     * @param flipAroundChar the character(s) demarcating the two halves you want to flip.
-     * @throws NameParseException if a regex fails or a condition is not expected
-     */
-    public void flip(String flipAroundChar) {
-        String[] parts = this.str.split(flipAroundChar);
-        if (parts != null) {
-            if (parts.length == 2) {
-                this.str = String.format("%s %s", parts[1], parts[0]);
-                this.norm();
-            } else if (parts.length > 2) {
-                throw new NameParseException(
-                        "Can't flip around multiple '" + flipAroundChar + "' characters in namestring.");
-            }
-        }
+    public String getSuffix() {
+        return suffix;
     }
 
-    /**
-     * <p>Removes extra whitespace and punctuation from {@code this.str}.</p>
-     *
-     * <p>Strips whitespace chars from ends, strips redundant whitespace, converts
-     * whitespace chars to " ".</p>
-     */
-    public void norm() {
-        this.str = this.str.trim();
-        this.str = this.str.replaceAll("\\s+", " ");
-        this.str = this.str.replaceAll(",$", " ");
+    @Override
+    public boolean equals(Object o) {
+        if (this == o) return true;
+        if (o == null || getClass() != o.getClass()) return false;
+        Name name = (Name) o;
+        return Objects.equals(leadingInitial, name.leadingInitial) &&
+                Objects.equals(firstName, name.firstName) &&
+                Objects.equals(nickName, name.nickName) &&
+                Objects.equals(middleName, name.middleName) &&
+                Objects.equals(lastName, name.lastName) &&
+                Objects.equals(suffix, name.suffix);
     }
 
+    @Override
+    public int hashCode() {
+        return Objects.hash(leadingInitial, firstName, nickName, middleName, lastName, suffix);
+    }
 }

http://git-wip-us.apache.org/repos/asf/commons-text/blob/685f9a86/src/main/java/org/apache/commons/text/names/NameString.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/names/NameString.java b/src/main/java/org/apache/commons/text/names/NameString.java
new file mode 100644
index 0000000..8f606f2
--- /dev/null
+++ b/src/main/java/org/apache/commons/text/names/NameString.java
@@ -0,0 +1,134 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.text.names;
+
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+/**
+ * A wrapper around a String representing a Name to parse. Contains the logic
+ * for handling executing Regexes on the wrapped name string.
+ *
+ * <p>This class is not thread-safe.</p>
+ */
+final class NameString {
+
+    /**
+     * Encapsulated string. Not immutable!
+     */
+    private String str;
+
+    /**
+     * Creates a new Name object.
+     *
+     * @param str encapsulated string.
+     */
+    public NameString(String str) {
+        this.str = str;
+    }
+
+    /**
+     * Gets the encapsulated string.
+     *
+     * @return encapsulated string
+     */
+    public String getStr() {
+        return str;
+    }
+
+    /**
+     * Sets the encapsulated string value.
+     *
+     * @param str string value
+     */
+    public void setStr(String str) {
+        this.str = str;
+        this.norm();
+    }
+
+    /**
+     * Uses a regex to chop off and return part of the namestring.
+     * There are two parts: first, it returns the matched substring,
+     * and then it removes that substring from the encapsulated
+     * string and normalizes it.
+     *
+     * @param regex matches the part of the namestring to chop off
+     * @param submatchIndex which of the parenthesized submatches to use
+     * @return the part of the namestring that got chopped off
+     */
+    public String chopWithRegex(String regex, int submatchIndex) {
+        String chopped = "";
+        Pattern pattern = Pattern.compile(regex);
+        Matcher matcher = pattern.matcher(this.str);
+
+        // workdaround for numReplacements in Java
+        int numReplacements = 0;
+        while (matcher.find()) {
+            numReplacements++;
+        }
+
+        // recreate or the groups are gone
+        pattern = Pattern.compile(regex);
+        matcher = pattern.matcher(this.str);
+        if (matcher.find()) {
+            boolean subset = matcher.groupCount() > submatchIndex;
+            if (subset) {
+                this.str = this.str.replaceAll(regex, " ");
+                if (numReplacements > 1) {
+                    throw new NameParseException("The regex being used to find the name has multiple matches.");
+                }
+                this.norm();
+                return matcher.group(submatchIndex).trim();
+            }
+        }
+        return chopped;
+    }
+
+    /**
+     * Flips the front and back parts of a name with one another.
+     * Front and back are determined by a specified character somewhere in the
+     * middle of the string.
+     *
+     * @param flipAroundChar the character(s) demarcating the two halves you want to flip.
+     * @throws NameParseException if a regex fails or a condition is not expected
+     */
+    public void flip(String flipAroundChar) {
+        String[] parts = this.str.split(flipAroundChar);
+        if (parts != null) {
+            if (parts.length == 2) {
+                this.str = String.format("%s %s", parts[1], parts[0]);
+                this.norm();
+            } else if (parts.length > 2) {
+                throw new NameParseException(
+                        "Can't flip around multiple '" + flipAroundChar + "' characters in namestring.");
+            }
+        }
+    }
+
+    /**
+     * <p>Removes extra whitespace and punctuation from {@code this.str}.</p>
+     *
+     * <p>Strips whitespace chars from ends, strips redundant whitespace, converts
+     * whitespace chars to " ".</p>
+     */
+    public void norm() {
+        this.str = this.str.trim();
+        this.str = this.str.replaceAll("\\s+", " ");
+        this.str = this.str.replaceAll(",$", " ");
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/commons-text/blob/685f9a86/src/test/java/org/apache/commons/text/names/HumanNameParserTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/commons/text/names/HumanNameParserTest.java b/src/test/java/org/apache/commons/text/names/HumanNameParserTest.java
index d43d2be..d059ed4 100644
--- a/src/test/java/org/apache/commons/text/names/HumanNameParserTest.java
+++ b/src/test/java/org/apache/commons/text/names/HumanNameParserTest.java
@@ -71,26 +71,26 @@ public class HumanNameParserTest {
      */
     private void validateRecord(CSVRecord record) {
         HumanNameParser parser = new HumanNameParser();
-        parser.parse(record.get(Colums.Name));
+        Name result = parser.parse(record.get(Colums.Name));
 
         long recordNum = record.getRecordNumber();
         assertThat("Wrong LeadingInit in record " + recordNum,
-                parser.getLeadingInit(), equalTo(record.get(Colums.LeadingInit)));
-        
+                result.getLeadingInitial(), equalTo(record.get(Colums.LeadingInit)));
+
         assertThat("Wrong FirstName in record " + recordNum,
-                parser.getFirst(), equalTo(record.get(Colums.FirstName)));
-        
+                result.getFirstName(), equalTo(record.get(Colums.FirstName)));
+
         assertThat("Wrong NickName in record " + recordNum,
-                parser.getNickname(), equalTo(record.get(Colums.NickName)));
-        
+                result.getNickName(), equalTo(record.get(Colums.NickName)));
+
         assertThat("Wrong MiddleName in record " + recordNum,
-                parser.getMiddle(), equalTo(record.get(Colums.MiddleName)));
-        
+                result.getMiddleName(), equalTo(record.get(Colums.MiddleName)));
+
         assertThat("Wrong LastName in record " + recordNum,
-                parser.getLast(), equalTo(record.get(Colums.LastName)));
-        
+                result.getLastName(), equalTo(record.get(Colums.LastName)));
+
         assertThat("Wrong Suffix in record " + recordNum,
-                parser.getSuffix(), equalTo(record.get(Colums.Suffix)));
+                result.getSuffix(), equalTo(record.get(Colums.Suffix)));
     }
 
     private enum Colums {

http://git-wip-us.apache.org/repos/asf/commons-text/blob/685f9a86/src/test/java/org/apache/commons/text/names/NameStringTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/commons/text/names/NameStringTest.java b/src/test/java/org/apache/commons/text/names/NameStringTest.java
new file mode 100644
index 0000000..494c70b
--- /dev/null
+++ b/src/test/java/org/apache/commons/text/names/NameStringTest.java
@@ -0,0 +1,104 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.text.names;
+
+import static org.junit.Assert.assertEquals;
+
+import org.junit.Before;
+import org.junit.Test;
+
+/**
+ * Tests for {@code Name} and {@code HumanNameParser}. Utilizes the same
+ * input file as the PHP library 0.2 version.
+ */
+public class NameStringTest {
+
+    private NameString nameString;
+
+    @Before
+    public void setUp() {
+        nameString = new NameString("Björn O'Malley");
+    }
+
+    @Test
+    public void testSetStrRemovesWhitespaceAtEnds() {
+        nameString.setStr("    Björn O'Malley \r\n");
+        assertEquals(
+            "Björn O'Malley",
+            nameString.getStr()
+        );
+    }
+
+    @Test
+    public void testSetStrRemovesRedudentantWhitespace(){
+        nameString.setStr(" Björn    O'Malley");
+        assertEquals(
+            "Björn O'Malley",
+            nameString.getStr()
+        );
+    }
+
+    @Test
+    public void testChopWithRegexReturnsChoppedSubstring(){
+        nameString.setStr("Björn O'Malley");
+        assertEquals(
+            "Björn",
+            nameString.chopWithRegex("(^([^ ]+))(.+)", 1)
+        );
+    }
+
+    @Test
+    public void testChopWithRegexChopsStartOffNameStr(){
+        nameString.setStr("Björn O'Malley");
+        nameString.chopWithRegex("(^[^ ]+)", 0);
+        assertEquals(
+                "O'Malley",
+            nameString.getStr()
+        );
+    }
+
+    @Test
+    public void testChopWithRegexChopsEndOffNameStr(){
+        nameString.setStr("Björn O'Malley");
+        nameString.chopWithRegex("( (.+)$)", 1);
+        assertEquals(
+            "Björn",
+            nameString.getStr()
+        );
+    }
+
+    @Test
+    public void testChopWithRegexChopsMiddleFromNameStr(){
+        nameString.setStr("Björn 'Bill' O'Malley");
+        nameString.chopWithRegex("( '[^']+' )", 0);
+        assertEquals(
+            "Björn O'Malley",
+            nameString.getStr()
+        );
+    }
+
+    @Test
+    public void testFlip() {
+        nameString.setStr("O'Malley, Björn");
+        nameString.flip(",");
+        assertEquals(
+            "Björn O'Malley",
+            nameString.getStr()
+        );
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/commons-text/blob/685f9a86/src/test/java/org/apache/commons/text/names/NameTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/commons/text/names/NameTest.java b/src/test/java/org/apache/commons/text/names/NameTest.java
deleted file mode 100644
index 7822e92..0000000
--- a/src/test/java/org/apache/commons/text/names/NameTest.java
+++ /dev/null
@@ -1,104 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.commons.text.names;
-
-import static org.junit.Assert.assertEquals;
-
-import org.junit.Before;
-import org.junit.Test;
-
-/**
- * Tests for {@code Name} and {@code HumanNameParser}. Utilizes the same
- * input file as the PHP library 0.2 version.
- */
-public class NameTest {
-
-    protected Name object;
-
-    @Before
-    public void setUp() {
-        object = new Name("Björn O'Malley");
-    }
-
-    @Test
-    public void testSetStrRemovesWhitespaceAtEnds() {
-        object.setStr("    Björn O'Malley \r\n");
-        assertEquals(
-            "Björn O'Malley",
-            object.getStr()
-        );
-    }
-
-    @Test
-    public void testSetStrRemovesRedudentantWhitespace(){
-        object.setStr(" Björn    O'Malley");
-        assertEquals(
-            "Björn O'Malley",
-            object.getStr()
-        );
-    }
-
-    @Test
-    public void testChopWithRegexReturnsChoppedSubstring(){
-        object.setStr("Björn O'Malley");
-        assertEquals(
-            "Björn",
-            object.chopWithRegex("(^([^ ]+))(.+)", 1)
-        );
-    }
-
-    @Test
-    public void testChopWithRegexChopsStartOffNameStr(){
-        object.setStr("Björn O'Malley");
-        object.chopWithRegex("(^[^ ]+)", 0);
-        assertEquals(
-                "O'Malley",
-            object.getStr()
-        );
-    }
-
-    @Test
-    public void testChopWithRegexChopsEndOffNameStr(){
-        object.setStr("Björn O'Malley");
-        object.chopWithRegex("( (.+)$)", 1);
-        assertEquals(
-            "Björn",
-            object.getStr()
-        );
-    }
-
-    @Test
-    public void testChopWithRegexChopsMiddleFromNameStr(){
-        object.setStr("Björn 'Bill' O'Malley");
-        object.chopWithRegex("( '[^']+' )", 0);
-        assertEquals(
-            "Björn O'Malley",
-            object.getStr()
-        );
-    }
-
-    @Test
-    public void testFlip() {
-        object.setStr("O'Malley, Björn");
-        object.flip(",");
-        assertEquals(
-            "Björn O'Malley",
-            object.getStr()
-        );
-    }
-
-}