You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@commons.apache.org by ki...@apache.org on 2015/04/15 11:16:40 UTC
[text] SANDBOX-487 Human name parser
Repository: commons-text
Updated Branches:
refs/heads/master 6280d46c5 -> 411e81f8d
SANDBOX-487 Human name parser
Project: http://git-wip-us.apache.org/repos/asf/commons-text/repo
Commit: http://git-wip-us.apache.org/repos/asf/commons-text/commit/411e81f8
Tree: http://git-wip-us.apache.org/repos/asf/commons-text/tree/411e81f8
Diff: http://git-wip-us.apache.org/repos/asf/commons-text/diff/411e81f8
Branch: refs/heads/master
Commit: 411e81f8de92779b29b76a34412a68b6fcfee664
Parents: 6280d46
Author: Bruno P. Kinoshita <br...@yahoo.com.br>
Authored: Wed Apr 15 21:16:22 2015 +1200
Committer: Bruno P. Kinoshita <br...@yahoo.com.br>
Committed: Wed Apr 15 21:16:22 2015 +1200
----------------------------------------------------------------------
pom.xml | 54 ++++
.../commons/text/names/HumanNameParser.java | 269 +++++++++++++++++++
.../org/apache/commons/text/names/Name.java | 136 ++++++++++
.../commons/text/names/NameParseException.java | 79 ++++++
.../apache/commons/text/names/package-info.java | 22 ++
.../org/apache/commons/text/names/NameTest.java | 106 ++++++++
.../apache/commons/text/names/ParserTest.java | 104 +++++++
.../org/apache/commons/text/names/testNames.txt | 31 +++
8 files changed, 801 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/commons-text/blob/411e81f8/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index 6b2a50f..6cf4dcf 100644
--- a/pom.xml
+++ b/pom.xml
@@ -66,6 +66,13 @@
<!-- Lang should depend on very little -->
<dependencies>
<dependency>
+ <groupId>org.apache.commons</groupId>
+ <artifactId>commons-lang3</artifactId>
+ <!-- if upgrading, be sure to check shaded jar relocations! -->
+ <version>3.4</version>
+ </dependency>
+ <!-- testing -->
+ <dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.12</version>
@@ -139,6 +146,53 @@
</ignorePathsToDelete>
</configuration>
</plugin>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-shade-plugin</artifactId>
+ <!-- v1.7 is somehow not compatible with commons-parent 25; see
+ http://svn.apache.org/viewvc?diff_format=h&view=revision&revision=1350822 -->
+ <version>1.6</version>
+ <configuration>
+ <minimizeJar>true</minimizeJar>
+ <createDependencyReducedPom>true</createDependencyReducedPom>
+ <createSourcesJar>true</createSourcesJar>
+ <artifactSet>
+ <includes>
+ <include>org.apache.commons:commons-lang3</include>
+ </includes>
+ </artifactSet>
+ <relocations>
+ <relocation>
+ <pattern>org.apache.commons.lang3.builder.</pattern>
+ <shadedPattern>org.apache.commons.text._lang3.builder.__</shadedPattern>
+ </relocation>
+ <relocation>
+ <pattern>org.apache.commons.lang3.exception.</pattern>
+ <shadedPattern>org.apache.commons.text._lang3.exception.__</shadedPattern>
+ </relocation>
+ <relocation>
+ <pattern>org.apache.commons.lang3.mutable.</pattern>
+ <shadedPattern>org.apache.commons.text._lang3.mutable.__</shadedPattern>
+ </relocation>
+ <relocation>
+ <pattern>org.apache.commons.lang3.tuple.</pattern>
+ <shadedPattern>org.apache.commons.text._lang3.tuple.__</shadedPattern>
+ </relocation>
+ <relocation>
+ <pattern>org.apache.commons.lang3.</pattern>
+ <shadedPattern>org.apache.commons.text._lang3.__</shadedPattern>
+ </relocation>
+ </relocations>
+ </configuration>
+ <executions>
+ <execution>
+ <phase>package</phase>
+ <goals>
+ <goal>shade</goal>
+ </goals>
+ </execution>
+ </executions>
+ </plugin>
</plugins>
</build>
http://git-wip-us.apache.org/repos/asf/commons-text/blob/411e81f8/src/main/java/org/apache/commons/text/names/HumanNameParser.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/names/HumanNameParser.java b/src/main/java/org/apache/commons/text/names/HumanNameParser.java
new file mode 100644
index 0000000..6ad6394
--- /dev/null
+++ b/src/main/java/org/apache/commons/text/names/HumanNameParser.java
@@ -0,0 +1,269 @@
+/*
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.text.names;
+
+import java.util.Arrays;
+import java.util.List;
+
+import org.apache.commons.lang3.StringUtils;
+
+/**
+ * <p>A parser capable of parsing name parts out of a single string.</p>
+ *
+ * <p>The code works by basically applying several Regexes in a certain order
+ * and removing (chopping) tokens off the original string. The parser consumes
+ * the tokens during its creation.</p>
+ *
+ * <ul>
+ * <li>J. Walter Weatherman </li>
+ * <li>de la Cruz, Ana M.</li>
+ * <li>James C. ('Jimmy') O'Dell, Jr.</li>
+ * </ul>
+ *
+ * <p>and parses out the:</p>
+ *
+ * <ul>
+ * <li>leading initial (Like "J." in "J. Walter Weatherman")</li>
+ * <li>first name (or first initial in a name like 'R. Crumb')</li>
+ * <li>nicknames (like "Jimmy" in "James C. ('Jimmy') O'Dell, Jr.")</li>
+ * <li>middle names</li>
+ * <li>last name (including compound ones like "van der Sar' and "Ortega y Gasset"), and</li>
+ * <li>suffix (like 'Jr.', 'III')</li>
+ * </ul>
+ *
+ * <pre>
+ * Name name = new Name("S�rgio Vieira de Mello");
+ * HumanNameParser parser = new HumanNameParser(name);
+ * String firstName = parser.getFirst();
+ * String nickname = parser.getNickname();
+ * // ...
+ * </pre>
+ *
+ * <p>The original code was written in <a href="http://jasonpriem.com/human-name-parse">PHP</a>
+ * and ported to <a href="http://tupilabs.github.io/HumanNameParser.java/">Java</a>.</p>
+ *
+ * <p>This implementation is based on the Java implementation, with additions
+ * suggested in <a href="https://issues.apache.org/jira/browse/SANDBOX-487">SANDBOX-487</a>.</p>
+ *
+ * <p>This class is not thread-safe.</p>
+ *
+ * @since 1.0
+ */
+public class HumanNameParser {
+
+ /**
+ * Name parsed.
+ */
+ private Name name;
+ /**
+ * Leading init part.
+ */
+ private String leadingInit;
+ /**
+ * First name.
+ */
+ private String first;
+ /**
+ * Single nickname found in the name input.
+ */
+ private String nickname;
+ /**
+ * Middle name.
+ */
+ private String middle;
+ /**
+ * Last name.
+ */
+ private String last;
+ /**
+ * Name suffix.
+ */
+ private String suffix;
+ /**
+ * Suffixes found.
+ */
+ private List<String> suffixes;
+ /**
+ * Prefixes found.
+ */
+ private List<String> prefixes;
+
+ /**
+ * Creates a parser given a string name.
+ *
+ * @param name string name
+ */
+ public HumanNameParser(String name) {
+ this(new Name(name));
+ }
+
+ /**
+ * Creates a parser given a {@code Name} object.
+ *
+ * @param name {@code Name}
+ */
+ public HumanNameParser(Name name) {
+ this.name = name;
+
+ this.leadingInit = "";
+ this.first = "";
+ this.nickname = "";
+ this.middle = "";
+ this.last = "";
+ this.suffix = "";
+
+ this.suffixes = Arrays.asList(new String[] {
+ "esq", "esquire", "jr",
+ "sr", "2", "ii", "iii", "iv" });
+ this.prefixes = Arrays
+ .asList(new String[] {
+ "bar", "ben", "bin", "da", "dal",
+ "de la", "de", "del", "der", "di", "ibn", "la", "le",
+ "san", "st", "ste", "van", "van der", "van den", "vel",
+ "von" });
+
+ this.parse();
+ }
+
+ /**
+ * Gets the {@code Name} object.
+ *
+ * @return the {@code Name} object
+ */
+ public Name getName() {
+ return name;
+ }
+
+ /**
+ * Gets the leading init part of the name.
+ *
+ * @return the leading init part of the name
+ */
+ public String getLeadingInit() {
+ return leadingInit;
+ }
+
+ /**
+ * Gets the first name.
+ *
+ * @return first name
+ */
+ public String getFirst() {
+ return first;
+ }
+
+ /**
+ * Gets the nickname.
+ *
+ * @return the nickname
+ */
+ public String getNickname() {
+ return nickname;
+ }
+
+ /**
+ * Gets the middle name.
+ *
+ * @return the middle name
+ */
+ public String getMiddle() {
+ return middle;
+ }
+
+ /**
+ * Gets the last name.
+ *
+ * @return the last name
+ */
+ public String getLast() {
+ return last;
+ }
+
+ /**
+ * Gets the suffix part of the name.
+ *
+ * @return the name suffix
+ */
+ public String getSuffix() {
+ return suffix;
+ }
+
+ /**
+ * Gets the name suffixes.
+ *
+ * @return the name suffixes
+ */
+ public List<String> getSuffixes() {
+ return suffixes;
+ }
+
+ /**
+ * Gets the name prefixes.
+ *
+ * @return the name prefixes
+ */
+ public List<String> getPrefixes() {
+ return prefixes;
+ }
+
+ /**
+ * Consumes the string and creates the name parts.
+ *
+ * @throws NameParseException if the parser fails to retrieve the name parts
+ */
+ private void parse() {
+ String suffixes = StringUtils.join(this.suffixes, "\\.*|") + "\\.*";
+ String prefixes = StringUtils.join(this.prefixes, " |") + " ";
+
+ // The regex use is a bit tricky. *Everything* matched by the regex will be replaced,
+ // but you can select a particular parenthesized submatch to be returned.
+ // Also, note that each regex requres that the preceding ones have been run, and matches chopped out.
+ // names that starts or end w/ an apostrophe break this
+ String nicknamesRegex = "(?i) ('|\\\"|\\(\\\"*'*)(.+?)('|\\\"|\\\"*'*\\)) ";
+ String suffixRegex = "(?i),* *((" + suffixes + ")$)";
+ String lastRegex = "(?i)(?!^)\\b([^ ]+ y |" + prefixes + ")*[^ ]+$";
+ // note the lookahead, which isn't returned or replaced
+ String leadingInitRegex = "(?i)(^(.\\.*)(?= \\p{L}{2}))";
+ String firstRegex = "(?i)^([^ ]+)";
+
+ // get nickname, if there is one
+ this.nickname = this.name.chopWithRegex(nicknamesRegex, 2);
+
+ // get suffix, if there is one
+ this.suffix = this.name.chopWithRegex(suffixRegex, 1);
+
+ // flip the before-comma and after-comma parts of the name
+ this.name.flip(",");
+
+ // get the last name
+ this.last = this.name.chopWithRegex(lastRegex, 0);
+
+ // get the first initial, if there is one
+ this.leadingInit = this.name.chopWithRegex(leadingInitRegex, 1);
+
+ // get the first name
+ this.first = this.name.chopWithRegex(firstRegex, 0);
+ if (StringUtils.isBlank(this.first)) {
+ throw new NameParseException("Couldn't find a first name in '{" + this.name.getStr() + "}'");
+ }
+
+ // if anything's left, that's the middle name
+ this.middle = this.name.getStr();
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/commons-text/blob/411e81f8/src/main/java/org/apache/commons/text/names/Name.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/names/Name.java b/src/main/java/org/apache/commons/text/names/Name.java
new file mode 100644
index 0000000..71df7d8
--- /dev/null
+++ b/src/main/java/org/apache/commons/text/names/Name.java
@@ -0,0 +1,136 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.text.names;
+
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+/**
+ * <p>A {@code Name} object that encapsulates a name string, and contains the logic
+ * for handling with Regexes.</p>
+ *
+ * <p>This class is not thread-safe.</p>
+ *
+ * @since 1.0
+ */
+public class Name {
+
+ /**
+ * Encapsulated string. Not immutable!
+ */
+ private String str;
+
+ /**
+ * Creates a new Name object.
+ *
+ * @param str encapsulated string.
+ */
+ public Name(String str) {
+ this.str = str;
+ }
+
+ /**
+ * Gets the encapsulated string.
+ *
+ * @return encapsulated string
+ */
+ public String getStr() {
+ return str;
+ }
+
+ /**
+ * Sets the encapsulated string value.
+ *
+ * @param str string value
+ */
+ public void setStr(String str) {
+ this.str = str;
+ this.norm();
+ }
+
+ /**
+ * Uses a regex to chop off and return part of the namestring.
+ * There are two parts: first, it returns the matched substring,
+ * and then it removes that substring from the encapsulated
+ * string and normalizes it.
+ *
+ * @param regex matches the part of the namestring to chop off
+ * @param submatchIndex which of the parenthesized submatches to use
+ * @return the part of the namestring that got chopped off
+ */
+ public String chopWithRegex(String regex, int submatchIndex) {
+ String chopped = "";
+ Pattern pattern = Pattern.compile(regex);
+ Matcher matcher = pattern.matcher(this.str);
+
+ // workdaround for numReplacements in Java
+ int numReplacements = 0;
+ while (matcher.find()) {
+ numReplacements++;
+ }
+
+ // recreate or the groups are gone
+ pattern = Pattern.compile(regex);
+ matcher = pattern.matcher(this.str);
+ if (matcher.find()) {
+ boolean subset = matcher.groupCount() > submatchIndex;
+ if (subset) {
+ this.str = this.str.replaceAll(regex, " ");
+ if (numReplacements > 1) {
+ throw new NameParseException("The regex being used to find the name has multiple matches.");
+ }
+ this.norm();
+ return matcher.group(submatchIndex).trim();
+ }
+ }
+ return chopped;
+ }
+
+ /**
+ * Flips the front and back parts of a name with one another.
+ * Front and back are determined by a specified character somewhere in the
+ * middle of the string.
+ *
+ * @param flipAroundChar the character(s) demarcating the two halves you want to flip.
+ * @throws NameParseException if a regex fails or a condition is not expected
+ */
+ public void flip(String flipAroundChar) {
+ String[] parts = this.str.split(flipAroundChar);
+ if (parts != null) {
+ if (parts.length == 2) {
+ this.str = String.format("%s %s", parts[1], parts[0]);
+ this.norm();
+ } else if (parts.length > 2) {
+ throw new NameParseException(
+ "Can't flip around multiple '" + flipAroundChar + "' characters in namestring.");
+ }
+ }
+ }
+
+ /**
+ * <p>Removes extra whitespace and punctuation from {@code this.str}.</p>
+ *
+ * <p>Strips whitespace chars from ends, strips redundant whitespace, converts
+ * whitespace chars to " ".</p>
+ */
+ public void norm() {
+ this.str = this.str.trim();
+ this.str = this.str.replaceAll("\\s+", " ");
+ this.str = this.str.replaceAll(",$", " ");
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/commons-text/blob/411e81f8/src/main/java/org/apache/commons/text/names/NameParseException.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/names/NameParseException.java b/src/main/java/org/apache/commons/text/names/NameParseException.java
new file mode 100644
index 0000000..2ff160f
--- /dev/null
+++ b/src/main/java/org/apache/commons/text/names/NameParseException.java
@@ -0,0 +1,79 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.text.names;
+
+/**
+ * Name parse exception.
+ *
+ * @since 1.0
+ */
+public class NameParseException extends RuntimeException {
+
+ /**
+ * Serial UID.
+ */
+ private static final long serialVersionUID = -2375904385006224156L;
+
+ /**
+ * Constructor.
+ */
+ public NameParseException() {
+ super();
+ }
+
+ /**
+ * Contructor with message.
+ *
+ * @param message message
+ */
+ public NameParseException(String message) {
+ super(message);
+ }
+
+ /**
+ * Constructor with case.
+ *
+ * @param cause cause
+ */
+ public NameParseException(Throwable cause) {
+ super(cause);
+ }
+
+ /**
+ * Constructor with message and cause.
+ *
+ * @param message message
+ * @param cause cause
+ */
+ public NameParseException(String message, Throwable cause) {
+ super(message, cause);
+ }
+
+ /**
+ * Complete constructor.
+ *
+ * @param message message
+ * @param cause cause
+ * @param enableSuppression flag to enable suppression
+ * @param writableStackTrace a writable stack trace
+ */
+ public NameParseException(String message, Throwable cause,
+ boolean enableSuppression, boolean writableStackTrace) {
+ super(message, cause, enableSuppression, writableStackTrace);
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/commons-text/blob/411e81f8/src/main/java/org/apache/commons/text/names/package-info.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/names/package-info.java b/src/main/java/org/apache/commons/text/names/package-info.java
new file mode 100644
index 0000000..1423d24
--- /dev/null
+++ b/src/main/java/org/apache/commons/text/names/package-info.java
@@ -0,0 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * <p>A human names parser in Java.</p>
+ *
+ * @since 1.0
+ */
+package org.apache.commons.text.names;
http://git-wip-us.apache.org/repos/asf/commons-text/blob/411e81f8/src/test/java/org/apache/commons/text/names/NameTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/commons/text/names/NameTest.java b/src/test/java/org/apache/commons/text/names/NameTest.java
new file mode 100644
index 0000000..53c9764
--- /dev/null
+++ b/src/test/java/org/apache/commons/text/names/NameTest.java
@@ -0,0 +1,106 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.text.names;
+
+import static org.junit.Assert.assertEquals;
+
+import org.junit.Before;
+import org.junit.Test;
+
+/**
+ * Tests for {@code Name} and {@code HumanNameParser}. Utilizes the same
+ * input file as the PHP library 0.2 version.
+ *
+ * @since 1.0
+ */
+public class NameTest {
+
+ protected Name object;
+
+ @Before
+ public void setUp() {
+ object = new Name("Björn O'Malley");
+ }
+
+ @Test
+ public void testSetStrRemovesWhitespaceAtEnds() {
+ object.setStr(" Björn O'Malley \r\n");
+ assertEquals(
+ "Björn O'Malley",
+ object.getStr()
+ );
+ }
+
+ @Test
+ public void testSetStrRemovesRedudentantWhitespace(){
+ object.setStr(" Björn O'Malley");
+ assertEquals(
+ "Björn O'Malley",
+ object.getStr()
+ );
+ }
+
+ @Test
+ public void testChopWithRegexReturnsChoppedSubstring(){
+ object.setStr("Björn O'Malley");
+ assertEquals(
+ "Björn",
+ object.chopWithRegex("(^([^ ]+))(.+)", 1)
+ );
+ }
+
+ @Test
+ public void testChopWithRegexChopsStartOffNameStr(){
+ object.setStr("Björn O'Malley");
+ object.chopWithRegex("(^[^ ]+)", 0);
+ assertEquals(
+ "O'Malley",
+ object.getStr()
+ );
+ }
+
+ @Test
+ public void testChopWithRegexChopsEndOffNameStr(){
+ object.setStr("Björn O'Malley");
+ object.chopWithRegex("( (.+)$)", 1);
+ assertEquals(
+ "Björn",
+ object.getStr()
+ );
+ }
+
+ @Test
+ public void testChopWithRegexChopsMiddleFromNameStr(){
+ object.setStr("Björn 'Bill' O'Malley");
+ object.chopWithRegex("( '[^']+' )", 0);
+ assertEquals(
+ "Björn O'Malley",
+ object.getStr()
+ );
+ }
+
+ @Test
+ public void testFlip() {
+ object.setStr("O'Malley, Björn");
+ object.flip(",");
+ assertEquals(
+ "Björn O'Malley",
+ object.getStr()
+ );
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/commons-text/blob/411e81f8/src/test/java/org/apache/commons/text/names/ParserTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/commons/text/names/ParserTest.java b/src/test/java/org/apache/commons/text/names/ParserTest.java
new file mode 100644
index 0000000..e9ca3c0
--- /dev/null
+++ b/src/test/java/org/apache/commons/text/names/ParserTest.java
@@ -0,0 +1,104 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.text.names;
+
+import static org.junit.Assert.assertEquals;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileReader;
+import java.io.IOException;
+import java.util.logging.Logger;
+
+import org.apache.commons.lang3.StringUtils;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+/**
+ * Tests the {@code HumanNameParser} class.
+ *
+ * @since 1.0
+ */
+public class ParserTest {
+
+ private static final Logger LOGGER = Logger.getLogger(ParserTest.class.getName());
+
+ private static File testNames = null;
+
+ @BeforeClass
+ public static void setUp() {
+ testNames = new File(ParserTest.class.getResource("/org/apache/commons/text/names/testNames.txt").getFile());
+ }
+
+ @Test
+ public void testAll() throws IOException {
+ BufferedReader buffer = null;
+ FileReader reader = null;
+
+ try {
+ reader = new FileReader(testNames);
+ buffer = new BufferedReader(reader);
+
+ String line = null;
+ while ((line = buffer.readLine()) != null) {
+ if (StringUtils.isBlank(line)) {
+ LOGGER.warning("Empty line in testNames.txt");
+ continue;
+ }
+
+ String[] tokens = line.split("\\|");
+ if (tokens.length != 7) {
+ LOGGER.warning(String.format("Invalid line in testNames.txt: %s", line));
+ continue;
+ }
+
+ validateLine(tokens);
+ }
+ } finally {
+ if (reader != null)
+ reader.close();
+ if (buffer != null)
+ buffer.close();
+ }
+ }
+
+ /**
+ * Validates a line in the testNames.txt file.
+ *
+ * @param tokens the tokens with leading spaces
+ */
+ private void validateLine(String[] tokens) {
+ String name = tokens[0].trim();
+
+ String leadingInit = tokens[1].trim();
+ String first = tokens[2].trim();
+ String nickname = tokens[3].trim();
+ String middle = tokens[4].trim();
+ String last = tokens[5].trim();
+ String suffix = tokens[6].trim();
+
+ HumanNameParser parser = new HumanNameParser(name);
+
+ assertEquals(leadingInit, parser.getLeadingInit());
+ assertEquals(first, parser.getFirst());
+ assertEquals(nickname, parser.getNickname());
+ assertEquals(middle, parser.getMiddle());
+ assertEquals(last, parser.getLast());
+ assertEquals(suffix, parser.getSuffix());
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/commons-text/blob/411e81f8/src/test/resources/org/apache/commons/text/names/testNames.txt
----------------------------------------------------------------------
diff --git a/src/test/resources/org/apache/commons/text/names/testNames.txt b/src/test/resources/org/apache/commons/text/names/testNames.txt
new file mode 100644
index 0000000..83ddc31
--- /dev/null
+++ b/src/test/resources/org/apache/commons/text/names/testNames.txt
@@ -0,0 +1,31 @@
+Björn O'Malley| | Björn| | | O'Malley|
+Bin Lin| | Bin| | | Lin|
+Linda Jones| | Linda| | | Jones|
+Jason H. Priem| | Jason| | H.| Priem|
+Björn O'Malley-Muñoz| | Björn| | | O'Malley-Muñoz|
+Björn C. O'Malley| | Björn| | C.| O'Malley|
+Björn "Bill" O'Malley| | Björn| Bill| | O'Malley|
+Björn ("Bill") O'Malley| | Björn| Bill| | O'Malley|
+Björn ("Wild Bill") O'Malley| | Björn| Wild Bill| | O'Malley|
+Björn (Bill) O'Malley| | Björn| Bill| | O'Malley|
+Björn 'Bill' O'Malley| | Björn| Bill| | O'Malley|
+Björn C O'Malley| | Björn| | C| O'Malley|
+Björn C. R. O'Malley| | Björn| | C. R.| O'Malley|
+Björn Charles O'Malley| | Björn| | Charles| O'Malley|
+Björn Charles R. O'Malley| | Björn| | Charles R.| O'Malley|
+Björn van O'Malley| | Björn| | | van O'Malley|
+Björn Charles van der O'Malley| | Björn| | Charles| van der O'Malley|
+Björn Charles O'Malley y Muñoz| | Björn| | Charles| O'Malley y Muñoz|
+Björn O'Malley, Jr.| | Björn| | | O'Malley| Jr.
+Björn O'Malley Jr| | Björn| | | O'Malley| Jr
+B O'Malley| | B| | | O'Malley|
+William Carlos Williams| | William| | Carlos| Williams|
+C. Björn Roger O'Malley| C.| Björn| | Roger| O'Malley|
+B. C. O'Malley| | B.| | C.| O'Malley|
+B C O'Malley| | B| | C| O'Malley|
+B.J. Thomas| | B.J.| | | Thomas|
+O'Malley, Björn| | Björn| | | O'Malley|
+O'Malley, Björn Jr| | Björn| | | O'Malley| Jr
+O'Malley, C. Björn| C.| Björn| | | O'Malley|
+O'Malley, C. Björn III| C.| Björn| | | O'Malley| III
+O'Malley y Muñoz, C. Björn Roger III| C.| Björn| | Roger| O'Malley y Muñoz| III
\ No newline at end of file