You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@commons.apache.org by br...@apache.org on 2015/04/19 17:14:53 UTC
[04/12] [text] Make HumanNameParser return a name object. Introduce a
new wrapper object for strings to be parsed called NameString.
Make HumanNameParser return a name object. Introduce a new wrapper object for strings to be parsed called NameString.
Project: http://git-wip-us.apache.org/repos/asf/commons-text/repo
Commit: http://git-wip-us.apache.org/repos/asf/commons-text/commit/685f9a86
Tree: http://git-wip-us.apache.org/repos/asf/commons-text/tree/685f9a86
Diff: http://git-wip-us.apache.org/repos/asf/commons-text/diff/685f9a86
Branch: refs/heads/SANDBOX-498
Commit: 685f9a864d46cc526b14e3a7476465c49d991478
Parents: 9a0cc85
Author: Benedikt Ritter <br...@apache.org>
Authored: Sun Apr 19 16:22:45 2015 +0200
Committer: Benedikt Ritter <br...@apache.org>
Committed: Sun Apr 19 16:22:45 2015 +0200
----------------------------------------------------------------------
.../commons/text/names/HumanNameParser.java | 36 ++---
.../org/apache/commons/text/names/Name.java | 141 ++++++-------------
.../apache/commons/text/names/NameString.java | 134 ++++++++++++++++++
.../commons/text/names/HumanNameParserTest.java | 24 ++--
.../commons/text/names/NameStringTest.java | 104 ++++++++++++++
.../org/apache/commons/text/names/NameTest.java | 104 --------------
6 files changed, 315 insertions(+), 228 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/commons-text/blob/685f9a86/src/main/java/org/apache/commons/text/names/HumanNameParser.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/names/HumanNameParser.java b/src/main/java/org/apache/commons/text/names/HumanNameParser.java
index fa2433a..df8e55c 100644
--- a/src/main/java/org/apache/commons/text/names/HumanNameParser.java
+++ b/src/main/java/org/apache/commons/text/names/HumanNameParser.java
@@ -195,14 +195,14 @@ public class HumanNameParser {
/**
* Consumes the string and creates the name parts.
*
- * @param nameStr the name to parse. Must not be null.
+ * @param name the name to parse. Must not be null.
* @throws NameParseException if the parser fails to retrieve the name parts.
- * @throws NullPointerException if nameStr is null.
+ * @throws NullPointerException if name is null.
*/
- public void parse(String nameStr) {
- Objects.requireNonNull(nameStr, "Parameter 'nameStr' must not be null.");
+ public Name parse(String name) {
+ Objects.requireNonNull(name, "Parameter 'name' must not be null.");
- Name name = new Name(nameStr);
+ NameString nameString = new NameString(name);
String suffixes = StringUtils.join(this.suffixes, "\\.*|") + "\\.*";
String prefixes = StringUtils.join(this.prefixes, " |") + " ";
@@ -218,28 +218,30 @@ public class HumanNameParser {
String firstRegex = "(?i)^([^ ]+)";
// get nickname, if there is one
- this.nickname = name.chopWithRegex(nicknamesRegex, 2);
+ this.nickname = nameString.chopWithRegex(nicknamesRegex, 2);
// get suffix, if there is one
- this.suffix = name.chopWithRegex(suffixRegex, 1);
+ this.suffix = nameString.chopWithRegex(suffixRegex, 1);
- // flip the before-comma and after-comma parts of the name
- name.flip(",");
+ // flip the before-comma and after-comma parts of the nameString
+ nameString.flip(",");
- // get the last name
- this.last = name.chopWithRegex(lastRegex, 0);
+ // get the last nameString
+ this.last = nameString.chopWithRegex(lastRegex, 0);
// get the first initial, if there is one
- this.leadingInit = name.chopWithRegex(leadingInitRegex, 1);
+ this.leadingInit = nameString.chopWithRegex(leadingInitRegex, 1);
- // get the first name
- this.first = name.chopWithRegex(firstRegex, 0);
+ // get the first nameString
+ this.first = nameString.chopWithRegex(firstRegex, 0);
if (StringUtils.isBlank(this.first)) {
- throw new NameParseException("Couldn't find a first name in '{" + name.getStr() + "}'");
+ throw new NameParseException("Couldn't find a first name in '{" + nameString.getStr() + "}'");
}
- // if anything's left, that's the middle name
- this.middle = name.getStr();
+ // if anything's left, that's the middle nameString
+ this.middle = nameString.getStr();
+
+ return new Name(leadingInit, first, nickname, middle, last, suffix);
}
}
http://git-wip-us.apache.org/repos/asf/commons-text/blob/685f9a86/src/main/java/org/apache/commons/text/names/Name.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/names/Name.java b/src/main/java/org/apache/commons/text/names/Name.java
index 0dd2560..3067ba5 100644
--- a/src/main/java/org/apache/commons/text/names/Name.java
+++ b/src/main/java/org/apache/commons/text/names/Name.java
@@ -16,119 +16,70 @@
*/
package org.apache.commons.text.names;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
+import java.util.Objects;
/**
- * <p>A {@code Name} object that encapsulates a name string, and contains the logic
- * for handling with Regexes.</p>
+ * An object representing the result of parsing a Name.
*
- * <p>This class is not thread-safe.</p>
+ * <p>This class is immutable.</p>
*/
-public class Name {
+public final class Name {
- /**
- * Encapsulated string. Not immutable!
- */
- private String str;
+ private final String leadingInitial;
+ private final String firstName;
+ private final String nickName;
+ private final String middleName;
+ private final String lastName;
+ private final String suffix;
- /**
- * Creates a new Name object.
- *
- * @param str encapsulated string.
- */
- public Name(String str) {
- this.str = str;
+ Name(String leadingInitial, String firstName, String nickName, String middleName, String lastName, String suffix) {
+ this.leadingInitial = leadingInitial;
+ this.firstName = firstName;
+ this.nickName = nickName;
+ this.middleName = middleName;
+ this.lastName = lastName;
+ this.suffix = suffix;
}
- /**
- * Gets the encapsulated string.
- *
- * @return encapsulated string
- */
- public String getStr() {
- return str;
+ public String getLeadingInitial() {
+ return leadingInitial;
}
- /**
- * Sets the encapsulated string value.
- *
- * @param str string value
- */
- public void setStr(String str) {
- this.str = str;
- this.norm();
+ public String getFirstName() {
+ return firstName;
}
- /**
- * Uses a regex to chop off and return part of the namestring.
- * There are two parts: first, it returns the matched substring,
- * and then it removes that substring from the encapsulated
- * string and normalizes it.
- *
- * @param regex matches the part of the namestring to chop off
- * @param submatchIndex which of the parenthesized submatches to use
- * @return the part of the namestring that got chopped off
- */
- public String chopWithRegex(String regex, int submatchIndex) {
- String chopped = "";
- Pattern pattern = Pattern.compile(regex);
- Matcher matcher = pattern.matcher(this.str);
+ public String getNickName() {
+ return nickName;
+ }
- // workdaround for numReplacements in Java
- int numReplacements = 0;
- while (matcher.find()) {
- numReplacements++;
- }
+ public String getMiddleName() {
+ return middleName;
+ }
- // recreate or the groups are gone
- pattern = Pattern.compile(regex);
- matcher = pattern.matcher(this.str);
- if (matcher.find()) {
- boolean subset = matcher.groupCount() > submatchIndex;
- if (subset) {
- this.str = this.str.replaceAll(regex, " ");
- if (numReplacements > 1) {
- throw new NameParseException("The regex being used to find the name has multiple matches.");
- }
- this.norm();
- return matcher.group(submatchIndex).trim();
- }
- }
- return chopped;
+ public String getLastName() {
+ return lastName;
}
- /**
- * Flips the front and back parts of a name with one another.
- * Front and back are determined by a specified character somewhere in the
- * middle of the string.
- *
- * @param flipAroundChar the character(s) demarcating the two halves you want to flip.
- * @throws NameParseException if a regex fails or a condition is not expected
- */
- public void flip(String flipAroundChar) {
- String[] parts = this.str.split(flipAroundChar);
- if (parts != null) {
- if (parts.length == 2) {
- this.str = String.format("%s %s", parts[1], parts[0]);
- this.norm();
- } else if (parts.length > 2) {
- throw new NameParseException(
- "Can't flip around multiple '" + flipAroundChar + "' characters in namestring.");
- }
- }
+ public String getSuffix() {
+ return suffix;
}
- /**
- * <p>Removes extra whitespace and punctuation from {@code this.str}.</p>
- *
- * <p>Strips whitespace chars from ends, strips redundant whitespace, converts
- * whitespace chars to " ".</p>
- */
- public void norm() {
- this.str = this.str.trim();
- this.str = this.str.replaceAll("\\s+", " ");
- this.str = this.str.replaceAll(",$", " ");
+ @Override
+ public boolean equals(Object o) {
+ if (this == o) return true;
+ if (o == null || getClass() != o.getClass()) return false;
+ Name name = (Name) o;
+ return Objects.equals(leadingInitial, name.leadingInitial) &&
+ Objects.equals(firstName, name.firstName) &&
+ Objects.equals(nickName, name.nickName) &&
+ Objects.equals(middleName, name.middleName) &&
+ Objects.equals(lastName, name.lastName) &&
+ Objects.equals(suffix, name.suffix);
}
+ @Override
+ public int hashCode() {
+ return Objects.hash(leadingInitial, firstName, nickName, middleName, lastName, suffix);
+ }
}
http://git-wip-us.apache.org/repos/asf/commons-text/blob/685f9a86/src/main/java/org/apache/commons/text/names/NameString.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/names/NameString.java b/src/main/java/org/apache/commons/text/names/NameString.java
new file mode 100644
index 0000000..8f606f2
--- /dev/null
+++ b/src/main/java/org/apache/commons/text/names/NameString.java
@@ -0,0 +1,134 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.text.names;
+
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+/**
+ * A wrapper around a String representing a Name to parse. Contains the logic
+ * for handling executing Regexes on the wrapped name string.
+ *
+ * <p>This class is not thread-safe.</p>
+ */
+final class NameString {
+
+ /**
+ * Encapsulated string. Not immutable!
+ */
+ private String str;
+
+ /**
+ * Creates a new Name object.
+ *
+ * @param str encapsulated string.
+ */
+ public NameString(String str) {
+ this.str = str;
+ }
+
+ /**
+ * Gets the encapsulated string.
+ *
+ * @return encapsulated string
+ */
+ public String getStr() {
+ return str;
+ }
+
+ /**
+ * Sets the encapsulated string value.
+ *
+ * @param str string value
+ */
+ public void setStr(String str) {
+ this.str = str;
+ this.norm();
+ }
+
+ /**
+ * Uses a regex to chop off and return part of the namestring.
+ * There are two parts: first, it returns the matched substring,
+ * and then it removes that substring from the encapsulated
+ * string and normalizes it.
+ *
+ * @param regex matches the part of the namestring to chop off
+ * @param submatchIndex which of the parenthesized submatches to use
+ * @return the part of the namestring that got chopped off
+ */
+ public String chopWithRegex(String regex, int submatchIndex) {
+ String chopped = "";
+ Pattern pattern = Pattern.compile(regex);
+ Matcher matcher = pattern.matcher(this.str);
+
+ // workdaround for numReplacements in Java
+ int numReplacements = 0;
+ while (matcher.find()) {
+ numReplacements++;
+ }
+
+ // recreate or the groups are gone
+ pattern = Pattern.compile(regex);
+ matcher = pattern.matcher(this.str);
+ if (matcher.find()) {
+ boolean subset = matcher.groupCount() > submatchIndex;
+ if (subset) {
+ this.str = this.str.replaceAll(regex, " ");
+ if (numReplacements > 1) {
+ throw new NameParseException("The regex being used to find the name has multiple matches.");
+ }
+ this.norm();
+ return matcher.group(submatchIndex).trim();
+ }
+ }
+ return chopped;
+ }
+
+ /**
+ * Flips the front and back parts of a name with one another.
+ * Front and back are determined by a specified character somewhere in the
+ * middle of the string.
+ *
+ * @param flipAroundChar the character(s) demarcating the two halves you want to flip.
+ * @throws NameParseException if a regex fails or a condition is not expected
+ */
+ public void flip(String flipAroundChar) {
+ String[] parts = this.str.split(flipAroundChar);
+ if (parts != null) {
+ if (parts.length == 2) {
+ this.str = String.format("%s %s", parts[1], parts[0]);
+ this.norm();
+ } else if (parts.length > 2) {
+ throw new NameParseException(
+ "Can't flip around multiple '" + flipAroundChar + "' characters in namestring.");
+ }
+ }
+ }
+
+ /**
+ * <p>Removes extra whitespace and punctuation from {@code this.str}.</p>
+ *
+ * <p>Strips whitespace chars from ends, strips redundant whitespace, converts
+ * whitespace chars to " ".</p>
+ */
+ public void norm() {
+ this.str = this.str.trim();
+ this.str = this.str.replaceAll("\\s+", " ");
+ this.str = this.str.replaceAll(",$", " ");
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/commons-text/blob/685f9a86/src/test/java/org/apache/commons/text/names/HumanNameParserTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/commons/text/names/HumanNameParserTest.java b/src/test/java/org/apache/commons/text/names/HumanNameParserTest.java
index d43d2be..d059ed4 100644
--- a/src/test/java/org/apache/commons/text/names/HumanNameParserTest.java
+++ b/src/test/java/org/apache/commons/text/names/HumanNameParserTest.java
@@ -71,26 +71,26 @@ public class HumanNameParserTest {
*/
private void validateRecord(CSVRecord record) {
HumanNameParser parser = new HumanNameParser();
- parser.parse(record.get(Colums.Name));
+ Name result = parser.parse(record.get(Colums.Name));
long recordNum = record.getRecordNumber();
assertThat("Wrong LeadingInit in record " + recordNum,
- parser.getLeadingInit(), equalTo(record.get(Colums.LeadingInit)));
-
+ result.getLeadingInitial(), equalTo(record.get(Colums.LeadingInit)));
+
assertThat("Wrong FirstName in record " + recordNum,
- parser.getFirst(), equalTo(record.get(Colums.FirstName)));
-
+ result.getFirstName(), equalTo(record.get(Colums.FirstName)));
+
assertThat("Wrong NickName in record " + recordNum,
- parser.getNickname(), equalTo(record.get(Colums.NickName)));
-
+ result.getNickName(), equalTo(record.get(Colums.NickName)));
+
assertThat("Wrong MiddleName in record " + recordNum,
- parser.getMiddle(), equalTo(record.get(Colums.MiddleName)));
-
+ result.getMiddleName(), equalTo(record.get(Colums.MiddleName)));
+
assertThat("Wrong LastName in record " + recordNum,
- parser.getLast(), equalTo(record.get(Colums.LastName)));
-
+ result.getLastName(), equalTo(record.get(Colums.LastName)));
+
assertThat("Wrong Suffix in record " + recordNum,
- parser.getSuffix(), equalTo(record.get(Colums.Suffix)));
+ result.getSuffix(), equalTo(record.get(Colums.Suffix)));
}
private enum Colums {
http://git-wip-us.apache.org/repos/asf/commons-text/blob/685f9a86/src/test/java/org/apache/commons/text/names/NameStringTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/commons/text/names/NameStringTest.java b/src/test/java/org/apache/commons/text/names/NameStringTest.java
new file mode 100644
index 0000000..494c70b
--- /dev/null
+++ b/src/test/java/org/apache/commons/text/names/NameStringTest.java
@@ -0,0 +1,104 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.text.names;
+
+import static org.junit.Assert.assertEquals;
+
+import org.junit.Before;
+import org.junit.Test;
+
+/**
+ * Tests for {@code Name} and {@code HumanNameParser}. Utilizes the same
+ * input file as the PHP library 0.2 version.
+ */
+public class NameStringTest {
+
+ private NameString nameString;
+
+ @Before
+ public void setUp() {
+ nameString = new NameString("Björn O'Malley");
+ }
+
+ @Test
+ public void testSetStrRemovesWhitespaceAtEnds() {
+ nameString.setStr(" Björn O'Malley \r\n");
+ assertEquals(
+ "Björn O'Malley",
+ nameString.getStr()
+ );
+ }
+
+ @Test
+ public void testSetStrRemovesRedudentantWhitespace(){
+ nameString.setStr(" Björn O'Malley");
+ assertEquals(
+ "Björn O'Malley",
+ nameString.getStr()
+ );
+ }
+
+ @Test
+ public void testChopWithRegexReturnsChoppedSubstring(){
+ nameString.setStr("Björn O'Malley");
+ assertEquals(
+ "Björn",
+ nameString.chopWithRegex("(^([^ ]+))(.+)", 1)
+ );
+ }
+
+ @Test
+ public void testChopWithRegexChopsStartOffNameStr(){
+ nameString.setStr("Björn O'Malley");
+ nameString.chopWithRegex("(^[^ ]+)", 0);
+ assertEquals(
+ "O'Malley",
+ nameString.getStr()
+ );
+ }
+
+ @Test
+ public void testChopWithRegexChopsEndOffNameStr(){
+ nameString.setStr("Björn O'Malley");
+ nameString.chopWithRegex("( (.+)$)", 1);
+ assertEquals(
+ "Björn",
+ nameString.getStr()
+ );
+ }
+
+ @Test
+ public void testChopWithRegexChopsMiddleFromNameStr(){
+ nameString.setStr("Björn 'Bill' O'Malley");
+ nameString.chopWithRegex("( '[^']+' )", 0);
+ assertEquals(
+ "Björn O'Malley",
+ nameString.getStr()
+ );
+ }
+
+ @Test
+ public void testFlip() {
+ nameString.setStr("O'Malley, Björn");
+ nameString.flip(",");
+ assertEquals(
+ "Björn O'Malley",
+ nameString.getStr()
+ );
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/commons-text/blob/685f9a86/src/test/java/org/apache/commons/text/names/NameTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/commons/text/names/NameTest.java b/src/test/java/org/apache/commons/text/names/NameTest.java
deleted file mode 100644
index 7822e92..0000000
--- a/src/test/java/org/apache/commons/text/names/NameTest.java
+++ /dev/null
@@ -1,104 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.commons.text.names;
-
-import static org.junit.Assert.assertEquals;
-
-import org.junit.Before;
-import org.junit.Test;
-
-/**
- * Tests for {@code Name} and {@code HumanNameParser}. Utilizes the same
- * input file as the PHP library 0.2 version.
- */
-public class NameTest {
-
- protected Name object;
-
- @Before
- public void setUp() {
- object = new Name("Björn O'Malley");
- }
-
- @Test
- public void testSetStrRemovesWhitespaceAtEnds() {
- object.setStr(" Björn O'Malley \r\n");
- assertEquals(
- "Björn O'Malley",
- object.getStr()
- );
- }
-
- @Test
- public void testSetStrRemovesRedudentantWhitespace(){
- object.setStr(" Björn O'Malley");
- assertEquals(
- "Björn O'Malley",
- object.getStr()
- );
- }
-
- @Test
- public void testChopWithRegexReturnsChoppedSubstring(){
- object.setStr("Björn O'Malley");
- assertEquals(
- "Björn",
- object.chopWithRegex("(^([^ ]+))(.+)", 1)
- );
- }
-
- @Test
- public void testChopWithRegexChopsStartOffNameStr(){
- object.setStr("Björn O'Malley");
- object.chopWithRegex("(^[^ ]+)", 0);
- assertEquals(
- "O'Malley",
- object.getStr()
- );
- }
-
- @Test
- public void testChopWithRegexChopsEndOffNameStr(){
- object.setStr("Björn O'Malley");
- object.chopWithRegex("( (.+)$)", 1);
- assertEquals(
- "Björn",
- object.getStr()
- );
- }
-
- @Test
- public void testChopWithRegexChopsMiddleFromNameStr(){
- object.setStr("Björn 'Bill' O'Malley");
- object.chopWithRegex("( '[^']+' )", 0);
- assertEquals(
- "Björn O'Malley",
- object.getStr()
- );
- }
-
- @Test
- public void testFlip() {
- object.setStr("O'Malley, Björn");
- object.flip(",");
- assertEquals(
- "Björn O'Malley",
- object.getStr()
- );
- }
-
-}