You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@commons.apache.org by ki...@apache.org on 2015/04/20 04:58:58 UTC
[01/13] [text] Make parse method public
Repository: commons-text
Updated Branches:
refs/heads/master e8e85d9de -> bf8bfb0a4
Make parse method public
Project: http://git-wip-us.apache.org/repos/asf/commons-text/repo
Commit: http://git-wip-us.apache.org/repos/asf/commons-text/commit/aa293500
Tree: http://git-wip-us.apache.org/repos/asf/commons-text/tree/aa293500
Diff: http://git-wip-us.apache.org/repos/asf/commons-text/diff/aa293500
Branch: refs/heads/master
Commit: aa293500080d6872b3ac653dcf74a50cf8223ae5
Parents: e8e85d9
Author: Benedikt Ritter <br...@apache.org>
Authored: Sun Apr 19 15:58:16 2015 +0200
Committer: Benedikt Ritter <br...@apache.org>
Committed: Sun Apr 19 15:58:16 2015 +0200
----------------------------------------------------------------------
src/main/java/org/apache/commons/text/names/HumanNameParser.java | 4 +---
.../java/org/apache/commons/text/names/HumanNameParserTest.java | 1 +
2 files changed, 2 insertions(+), 3 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/commons-text/blob/aa293500/src/main/java/org/apache/commons/text/names/HumanNameParser.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/names/HumanNameParser.java b/src/main/java/org/apache/commons/text/names/HumanNameParser.java
index 843685a..5088bba 100644
--- a/src/main/java/org/apache/commons/text/names/HumanNameParser.java
+++ b/src/main/java/org/apache/commons/text/names/HumanNameParser.java
@@ -134,8 +134,6 @@ public class HumanNameParser {
"de la", "de", "del", "der", "di", "ibn", "la", "le",
"san", "st", "ste", "van", "van der", "van den", "vel",
"von" });
-
- this.parse();
}
/**
@@ -224,7 +222,7 @@ public class HumanNameParser {
*
* @throws NameParseException if the parser fails to retrieve the name parts
*/
- private void parse() {
+ public void parse() {
String suffixes = StringUtils.join(this.suffixes, "\\.*|") + "\\.*";
String prefixes = StringUtils.join(this.prefixes, " |") + " ";
http://git-wip-us.apache.org/repos/asf/commons-text/blob/aa293500/src/test/java/org/apache/commons/text/names/HumanNameParserTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/commons/text/names/HumanNameParserTest.java b/src/test/java/org/apache/commons/text/names/HumanNameParserTest.java
index 90e1dfa..5ff7805 100644
--- a/src/test/java/org/apache/commons/text/names/HumanNameParserTest.java
+++ b/src/test/java/org/apache/commons/text/names/HumanNameParserTest.java
@@ -65,6 +65,7 @@ public class HumanNameParserTest {
*/
private void validateRecord(CSVRecord record) {
HumanNameParser parser = new HumanNameParser(record.get(Colums.Name));
+ parser.parse();
long recordNum = record.getRecordNumber();
assertThat("Wrong LeadingInit in record " + recordNum,
[02/13] [text] Pass the name to parse as parameter to the parse method
Posted by ki...@apache.org.
Pass the name to parse as parameter to the parse method
Project: http://git-wip-us.apache.org/repos/asf/commons-text/repo
Commit: http://git-wip-us.apache.org/repos/asf/commons-text/commit/df7e7a7b
Tree: http://git-wip-us.apache.org/repos/asf/commons-text/tree/df7e7a7b
Diff: http://git-wip-us.apache.org/repos/asf/commons-text/diff/df7e7a7b
Branch: refs/heads/master
Commit: df7e7a7b0aba73a1bf09c41dbd32e913252a8707
Parents: aa29350
Author: Benedikt Ritter <br...@apache.org>
Authored: Sun Apr 19 16:02:55 2015 +0200
Committer: Benedikt Ritter <br...@apache.org>
Committed: Sun Apr 19 16:02:55 2015 +0200
----------------------------------------------------------------------
.../commons/text/names/HumanNameParser.java | 52 ++++++--------------
.../commons/text/names/HumanNameParserTest.java | 4 +-
2 files changed, 16 insertions(+), 40 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/commons-text/blob/df7e7a7b/src/main/java/org/apache/commons/text/names/HumanNameParser.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/names/HumanNameParser.java b/src/main/java/org/apache/commons/text/names/HumanNameParser.java
index 5088bba..bf8f9ed 100644
--- a/src/main/java/org/apache/commons/text/names/HumanNameParser.java
+++ b/src/main/java/org/apache/commons/text/names/HumanNameParser.java
@@ -65,10 +65,6 @@ import org.apache.commons.lang3.StringUtils;
public class HumanNameParser {
/**
- * Name parsed.
- */
- private Name name;
- /**
* Leading init part.
*/
private String leadingInit;
@@ -103,21 +99,8 @@ public class HumanNameParser {
/**
* Creates a parser given a string name.
- *
- * @param name string name
- */
- public HumanNameParser(String name) {
- this(new Name(name));
- }
-
- /**
- * Creates a parser given a {@code Name} object.
- *
- * @param name {@code Name}
*/
- public HumanNameParser(Name name) {
- this.name = name;
-
+ public HumanNameParser() {
this.leadingInit = "";
this.first = "";
this.nickname = "";
@@ -125,9 +108,9 @@ public class HumanNameParser {
this.last = "";
this.suffix = "";
- this.suffixes = Arrays.asList(new String[] {
+ this.suffixes = Arrays.asList(new String[]{
"esq", "esquire", "jr",
- "sr", "2", "ii", "iii", "iv" });
+ "sr", "2", "ii", "iii", "iv"});
this.prefixes = Arrays
.asList(new String[] {
"bar", "ben", "bin", "da", "dal",
@@ -137,15 +120,6 @@ public class HumanNameParser {
}
/**
- * Gets the {@code Name} object.
- *
- * @return the {@code Name} object
- */
- public Name getName() {
- return name;
- }
-
- /**
* Gets the leading init part of the name.
*
* @return the leading init part of the name
@@ -220,9 +194,11 @@ public class HumanNameParser {
/**
* Consumes the string and creates the name parts.
*
+ * @param nameStr the name to parse.
* @throws NameParseException if the parser fails to retrieve the name parts
*/
- public void parse() {
+ public void parse(String nameStr) {
+ Name name = new Name(nameStr);
String suffixes = StringUtils.join(this.suffixes, "\\.*|") + "\\.*";
String prefixes = StringUtils.join(this.prefixes, " |") + " ";
@@ -238,28 +214,28 @@ public class HumanNameParser {
String firstRegex = "(?i)^([^ ]+)";
// get nickname, if there is one
- this.nickname = this.name.chopWithRegex(nicknamesRegex, 2);
+ this.nickname = name.chopWithRegex(nicknamesRegex, 2);
// get suffix, if there is one
- this.suffix = this.name.chopWithRegex(suffixRegex, 1);
+ this.suffix = name.chopWithRegex(suffixRegex, 1);
// flip the before-comma and after-comma parts of the name
- this.name.flip(",");
+ name.flip(",");
// get the last name
- this.last = this.name.chopWithRegex(lastRegex, 0);
+ this.last = name.chopWithRegex(lastRegex, 0);
// get the first initial, if there is one
- this.leadingInit = this.name.chopWithRegex(leadingInitRegex, 1);
+ this.leadingInit = name.chopWithRegex(leadingInitRegex, 1);
// get the first name
- this.first = this.name.chopWithRegex(firstRegex, 0);
+ this.first = name.chopWithRegex(firstRegex, 0);
if (StringUtils.isBlank(this.first)) {
- throw new NameParseException("Couldn't find a first name in '{" + this.name.getStr() + "}'");
+ throw new NameParseException("Couldn't find a first name in '{" + name.getStr() + "}'");
}
// if anything's left, that's the middle name
- this.middle = this.name.getStr();
+ this.middle = name.getStr();
}
}
http://git-wip-us.apache.org/repos/asf/commons-text/blob/df7e7a7b/src/test/java/org/apache/commons/text/names/HumanNameParserTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/commons/text/names/HumanNameParserTest.java b/src/test/java/org/apache/commons/text/names/HumanNameParserTest.java
index 5ff7805..478d19c 100644
--- a/src/test/java/org/apache/commons/text/names/HumanNameParserTest.java
+++ b/src/test/java/org/apache/commons/text/names/HumanNameParserTest.java
@@ -64,8 +64,8 @@ public class HumanNameParserTest {
* @param record a CSVRecord representing one record in the input file.
*/
private void validateRecord(CSVRecord record) {
- HumanNameParser parser = new HumanNameParser(record.get(Colums.Name));
- parser.parse();
+ HumanNameParser parser = new HumanNameParser();
+ parser.parse(record.get(Colums.Name));
long recordNum = record.getRecordNumber();
assertThat("Wrong LeadingInit in record " + recordNum,
[11/13] [text] Better JavaDoc for HumanNameParser
Posted by ki...@apache.org.
Better JavaDoc for HumanNameParser
Project: http://git-wip-us.apache.org/repos/asf/commons-text/repo
Commit: http://git-wip-us.apache.org/repos/asf/commons-text/commit/6d047a46
Tree: http://git-wip-us.apache.org/repos/asf/commons-text/tree/6d047a46
Diff: http://git-wip-us.apache.org/repos/asf/commons-text/diff/6d047a46
Branch: refs/heads/master
Commit: 6d047a461f83017c8b723f4b28c0ad10f3f1dc36
Parents: b1c7e56
Author: Benedikt Ritter <br...@apache.org>
Authored: Sun Apr 19 17:13:11 2015 +0200
Committer: Benedikt Ritter <br...@apache.org>
Committed: Sun Apr 19 17:13:11 2015 +0200
----------------------------------------------------------------------
.../commons/text/names/HumanNameParser.java | 99 +++++++++++++-------
1 file changed, 64 insertions(+), 35 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/commons-text/blob/6d047a46/src/main/java/org/apache/commons/text/names/HumanNameParser.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/names/HumanNameParser.java b/src/main/java/org/apache/commons/text/names/HumanNameParser.java
index b5c0aa3..5407d15 100644
--- a/src/main/java/org/apache/commons/text/names/HumanNameParser.java
+++ b/src/main/java/org/apache/commons/text/names/HumanNameParser.java
@@ -24,58 +24,87 @@ import java.util.Objects;
import org.apache.commons.lang3.StringUtils;
/**
- * <p>A parser capable of parsing name parts out of a single string.</p>
+ * A parser capable of parsing name parts out of a single string.
*
+ * <h3>Parsing examples</h3>
+ *
* <p>The code works by basically applying several Regexes in a certain order
- * and removing (chopping) tokens off the original string. The parser consumes
- * the tokens during its creation.</p>
+ * and removing (chopping) tokens off the original string. The parser creates
+ * a {@link Name} object representing the parse result. Note that passing null
+ * to the {@link #parse(String)} method will result in an exception.</p>
*
- * <ul>
- * <li>J. Walter Weatherman </li>
- * <li>de la Cruz, Ana M.</li>
- * <li>James C. ('Jimmy') O'Dell, Jr.</li>
- * </ul>
- *
- * <p>and parses out the:</p>
- *
- * <ul>
- * <li>leading initial (Like "J." in "J. Walter Weatherman")</li>
- * <li>first name (or first initial in a name like 'R. Crumb')</li>
- * <li>nicknames (like "Jimmy" in "James C. ('Jimmy') O'Dell, Jr.")</li>
- * <li>middle names</li>
- * <li>last name (including compound ones like "van der Sar' and "Ortega y Gasset"), and</li>
- * <li>suffix (like 'Jr.', 'III')</li>
- * </ul>
+ * <table>
+ * <tr>
+ * <th>input</th>
+ * <th>Leading initial</th>
+ * <th>First name</th>
+ * <th>Nick name</th>
+ * <th>Middle name</th>
+ * <th>Last Name</th>
+ * <th>Suffix</th>
+ * </tr>
+ * <tr>
+ * <td>J. Walter Weatherman</td>
+ * <td>J.</td>
+ * <td>Walter</td>
+ * <td></td>
+ * <td></td>
+ * <td>Weatherman</td>
+ * <td></td>
+ * </tr>
+ * <tr>
+ * <td>de la Cruz, Ana M.</td>
+ * <td></td>
+ * <td>Ana</td>
+ * <td></td>
+ * <td>M.</td>
+ * <td>de la Cruz</td>
+ * <td></td>
+ * </tr>
+ * <tr>
+ * <td>James C. ('Jimmy') O'Dell, Jr.</td>
+ * <td></td>
+ * <td>James</td>
+ * <td>Jimmy</td>
+ * <td>C.</td>
+ * <td>O'Dell</td>
+ * <td>Jr.</td>
+ * </tr>
+ * </table>
*
+ * <h3>Sample usage</h3>
+ *
+ * <p>HumanNameParser instances are immutable and can be reused for parsing multiple names:</p>
+ *
* <pre>
- * Name name = new Name("S�rgio Vieira de Mello");
- * HumanNameParser parser = new HumanNameParser(name);
- * String firstName = parser.getFirst();
- * String nickname = parser.getNickname();
+ * HumanNameParser parser = new HumanNameParser();
+ * Name parsedName = parser.parse("S�rgio Vieira de Mello")
+ * String firstName = parsedName.getFirstName();
+ * String nickname = parsedName.getNickName();
* // ...
+ *
+ * Name nextName = parser.parse("James C. ('Jimmy') O'Dell, Jr.")
+ * String firstName = nextName.getFirstName();
+ * String nickname = nextName.getNickName();
* </pre>
*
+ * <h3>Further notes</h3>
+ *
* <p>The original code was written in <a href="http://jasonpriem.com/human-name-parse">PHP</a>
- * and ported to <a href="http://tupilabs.github.io/HumanNameParser.java/">Java</a>.</p>
- *
- * <p>This implementation is based on the Java implementation, with additions
- * suggested in <a href="https://issues.apache.org/jira/browse/SANDBOX-487">SANDBOX-487</a>.</p>
+ * and ported to <a href="http://tupilabs.github.io/HumanNameParser.java/">Java</a>. This
+ * implementation is based on the Java implementation, with additions
+ * suggested in <a href="https://issues.apache.org/jira/browse/SANDBOX-487">SANDBOX-487</a>
+ * and <a href="https://issues.apache.org/jira/browse/SANDBOX-498">SANDBOX-498</a>.</p>
*
* <p>This class is immutable.</p>
*/
public final class HumanNameParser {
- /**
- * Suffixes found.
- */
private final List<String> suffixes;
- /**
- * Prefixes found.
- */
private final List<String> prefixes;
/**
- * Creates a parser given a string name.
+ * Creates a new parser.
*/
public HumanNameParser() {
// TODO make this configurable
@@ -90,7 +119,7 @@ public final class HumanNameParser {
}
/**
- * Consumes the string and creates the name parts.
+ * Parses a name from the given string.
*
* @param name the name to parse. Must not be null.
* @throws NameParseException if the parser fails to retrieve the name parts.
[06/13] [text] Use a shared parser instance for tests
Posted by ki...@apache.org.
Use a shared parser instance for tests
Project: http://git-wip-us.apache.org/repos/asf/commons-text/repo
Commit: http://git-wip-us.apache.org/repos/asf/commons-text/commit/bbba0a32
Tree: http://git-wip-us.apache.org/repos/asf/commons-text/tree/bbba0a32
Diff: http://git-wip-us.apache.org/repos/asf/commons-text/diff/bbba0a32
Branch: refs/heads/master
Commit: bbba0a327b7ad8873d176254ec2a550757911bda
Parents: 1f6c5da
Author: Benedikt Ritter <br...@apache.org>
Authored: Sun Apr 19 16:30:00 2015 +0200
Committer: Benedikt Ritter <br...@apache.org>
Committed: Sun Apr 19 16:30:00 2015 +0200
----------------------------------------------------------------------
.../commons/text/names/HumanNameParserTest.java | 18 +++++++++---------
1 file changed, 9 insertions(+), 9 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/commons-text/blob/bbba0a32/src/test/java/org/apache/commons/text/names/HumanNameParserTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/commons/text/names/HumanNameParserTest.java b/src/test/java/org/apache/commons/text/names/HumanNameParserTest.java
index d059ed4..314a949 100644
--- a/src/test/java/org/apache/commons/text/names/HumanNameParserTest.java
+++ b/src/test/java/org/apache/commons/text/names/HumanNameParserTest.java
@@ -33,32 +33,33 @@ import org.junit.Test;
*/
public class HumanNameParserTest {
- private CSVParser parser;
+ private CSVParser inputParser;
+ private HumanNameParser nameParser;
@Before
public void setUp() throws Exception {
- parser = CSVParser.parse(
+ inputParser = CSVParser.parse(
HumanNameParserTest.class.getResource("testNames.txt"),
Charset.forName("UTF-8"),
CSVFormat.DEFAULT.withDelimiter('|').withHeader());
+ nameParser = new HumanNameParser();
}
@After
public void tearDown() throws Exception {
- if (parser != null) {
- parser.close();
+ if (inputParser != null) {
+ inputParser.close();
}
}
@Test(expected = NullPointerException.class)
public void shouldThrowNullPointerException_WhenNullIsParsed() throws Exception {
- HumanNameParser parser = new HumanNameParser();
- parser.parse(null);
+ nameParser.parse(null);
}
@Test
public void testInputs() {
- for (CSVRecord record : parser) {
+ for (CSVRecord record : inputParser) {
validateRecord(record);
}
}
@@ -70,8 +71,7 @@ public class HumanNameParserTest {
* @param record a CSVRecord representing one record in the input file.
*/
private void validateRecord(CSVRecord record) {
- HumanNameParser parser = new HumanNameParser();
- Name result = parser.parse(record.get(Colums.Name));
+ Name result = nameParser.parse(record.get(Colums.Name));
long recordNum = record.getRecordNumber();
assertThat("Wrong LeadingInit in record " + recordNum,
[03/13] [text] Check for null inputs
Posted by ki...@apache.org.
Check for null inputs
Project: http://git-wip-us.apache.org/repos/asf/commons-text/repo
Commit: http://git-wip-us.apache.org/repos/asf/commons-text/commit/9a0cc85a
Tree: http://git-wip-us.apache.org/repos/asf/commons-text/tree/9a0cc85a
Diff: http://git-wip-us.apache.org/repos/asf/commons-text/diff/9a0cc85a
Branch: refs/heads/master
Commit: 9a0cc85ad01dcf1f468736984cdd5dec0a7a3bf3
Parents: df7e7a7
Author: Benedikt Ritter <br...@apache.org>
Authored: Sun Apr 19 16:06:09 2015 +0200
Committer: Benedikt Ritter <br...@apache.org>
Committed: Sun Apr 19 16:06:09 2015 +0200
----------------------------------------------------------------------
.../java/org/apache/commons/text/names/HumanNameParser.java | 8 ++++++--
.../org/apache/commons/text/names/HumanNameParserTest.java | 6 ++++++
2 files changed, 12 insertions(+), 2 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/commons-text/blob/9a0cc85a/src/main/java/org/apache/commons/text/names/HumanNameParser.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/names/HumanNameParser.java b/src/main/java/org/apache/commons/text/names/HumanNameParser.java
index bf8f9ed..fa2433a 100644
--- a/src/main/java/org/apache/commons/text/names/HumanNameParser.java
+++ b/src/main/java/org/apache/commons/text/names/HumanNameParser.java
@@ -19,6 +19,7 @@ package org.apache.commons.text.names;
import java.util.Arrays;
import java.util.List;
+import java.util.Objects;
import org.apache.commons.lang3.StringUtils;
@@ -194,10 +195,13 @@ public class HumanNameParser {
/**
* Consumes the string and creates the name parts.
*
- * @param nameStr the name to parse.
- * @throws NameParseException if the parser fails to retrieve the name parts
+ * @param nameStr the name to parse. Must not be null.
+ * @throws NameParseException if the parser fails to retrieve the name parts.
+ * @throws NullPointerException if nameStr is null.
*/
public void parse(String nameStr) {
+ Objects.requireNonNull(nameStr, "Parameter 'nameStr' must not be null.");
+
Name name = new Name(nameStr);
String suffixes = StringUtils.join(this.suffixes, "\\.*|") + "\\.*";
String prefixes = StringUtils.join(this.prefixes, " |") + " ";
http://git-wip-us.apache.org/repos/asf/commons-text/blob/9a0cc85a/src/test/java/org/apache/commons/text/names/HumanNameParserTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/commons/text/names/HumanNameParserTest.java b/src/test/java/org/apache/commons/text/names/HumanNameParserTest.java
index 478d19c..d43d2be 100644
--- a/src/test/java/org/apache/commons/text/names/HumanNameParserTest.java
+++ b/src/test/java/org/apache/commons/text/names/HumanNameParserTest.java
@@ -50,6 +50,12 @@ public class HumanNameParserTest {
}
}
+ @Test(expected = NullPointerException.class)
+ public void shouldThrowNullPointerException_WhenNullIsParsed() throws Exception {
+ HumanNameParser parser = new HumanNameParser();
+ parser.parse(null);
+ }
+
@Test
public void testInputs() {
for (CSVRecord record : parser) {
[10/13] [text] Condition will always be true
Posted by ki...@apache.org.
Condition will always be true
Project: http://git-wip-us.apache.org/repos/asf/commons-text/repo
Commit: http://git-wip-us.apache.org/repos/asf/commons-text/commit/b1c7e564
Tree: http://git-wip-us.apache.org/repos/asf/commons-text/tree/b1c7e564
Diff: http://git-wip-us.apache.org/repos/asf/commons-text/diff/b1c7e564
Branch: refs/heads/master
Commit: b1c7e564251e7a404aa3d021c282349150fd4061
Parents: ed985cd
Author: Benedikt Ritter <br...@apache.org>
Authored: Sun Apr 19 16:45:49 2015 +0200
Committer: Benedikt Ritter <br...@apache.org>
Committed: Sun Apr 19 16:45:49 2015 +0200
----------------------------------------------------------------------
.../org/apache/commons/text/names/NameString.java | 14 ++++++--------
1 file changed, 6 insertions(+), 8 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/commons-text/blob/b1c7e564/src/main/java/org/apache/commons/text/names/NameString.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/names/NameString.java b/src/main/java/org/apache/commons/text/names/NameString.java
index 54e2753..21898d3 100644
--- a/src/main/java/org/apache/commons/text/names/NameString.java
+++ b/src/main/java/org/apache/commons/text/names/NameString.java
@@ -98,14 +98,12 @@ final class NameString {
*/
void flip(String flipAroundChar) {
String[] parts = this.str.split(flipAroundChar);
- if (parts != null) {
- if (parts.length == 2) {
- this.str = String.format("%s %s", parts[1], parts[0]);
- this.norm();
- } else if (parts.length > 2) {
- throw new NameParseException(
- "Can't flip around multiple '" + flipAroundChar + "' characters in namestring.");
- }
+ if (parts.length == 2) {
+ this.str = String.format("%s %s", parts[1], parts[0]);
+ this.norm();
+ } else if (parts.length > 2) {
+ throw new NameParseException(
+ "Can't flip around multiple '" + flipAroundChar + "' characters in namestring.");
}
}
[04/13] [text] Make HumanNameParser return a name object. Introduce a
new wrapper object for strings to be parsed called NameString.
Posted by ki...@apache.org.
Make HumanNameParser return a name object. Introduce a new wrapper object for strings to be parsed called NameString.
Project: http://git-wip-us.apache.org/repos/asf/commons-text/repo
Commit: http://git-wip-us.apache.org/repos/asf/commons-text/commit/685f9a86
Tree: http://git-wip-us.apache.org/repos/asf/commons-text/tree/685f9a86
Diff: http://git-wip-us.apache.org/repos/asf/commons-text/diff/685f9a86
Branch: refs/heads/master
Commit: 685f9a864d46cc526b14e3a7476465c49d991478
Parents: 9a0cc85
Author: Benedikt Ritter <br...@apache.org>
Authored: Sun Apr 19 16:22:45 2015 +0200
Committer: Benedikt Ritter <br...@apache.org>
Committed: Sun Apr 19 16:22:45 2015 +0200
----------------------------------------------------------------------
.../commons/text/names/HumanNameParser.java | 36 ++---
.../org/apache/commons/text/names/Name.java | 141 ++++++-------------
.../apache/commons/text/names/NameString.java | 134 ++++++++++++++++++
.../commons/text/names/HumanNameParserTest.java | 24 ++--
.../commons/text/names/NameStringTest.java | 104 ++++++++++++++
.../org/apache/commons/text/names/NameTest.java | 104 --------------
6 files changed, 315 insertions(+), 228 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/commons-text/blob/685f9a86/src/main/java/org/apache/commons/text/names/HumanNameParser.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/names/HumanNameParser.java b/src/main/java/org/apache/commons/text/names/HumanNameParser.java
index fa2433a..df8e55c 100644
--- a/src/main/java/org/apache/commons/text/names/HumanNameParser.java
+++ b/src/main/java/org/apache/commons/text/names/HumanNameParser.java
@@ -195,14 +195,14 @@ public class HumanNameParser {
/**
* Consumes the string and creates the name parts.
*
- * @param nameStr the name to parse. Must not be null.
+ * @param name the name to parse. Must not be null.
* @throws NameParseException if the parser fails to retrieve the name parts.
- * @throws NullPointerException if nameStr is null.
+ * @throws NullPointerException if name is null.
*/
- public void parse(String nameStr) {
- Objects.requireNonNull(nameStr, "Parameter 'nameStr' must not be null.");
+ public Name parse(String name) {
+ Objects.requireNonNull(name, "Parameter 'name' must not be null.");
- Name name = new Name(nameStr);
+ NameString nameString = new NameString(name);
String suffixes = StringUtils.join(this.suffixes, "\\.*|") + "\\.*";
String prefixes = StringUtils.join(this.prefixes, " |") + " ";
@@ -218,28 +218,30 @@ public class HumanNameParser {
String firstRegex = "(?i)^([^ ]+)";
// get nickname, if there is one
- this.nickname = name.chopWithRegex(nicknamesRegex, 2);
+ this.nickname = nameString.chopWithRegex(nicknamesRegex, 2);
// get suffix, if there is one
- this.suffix = name.chopWithRegex(suffixRegex, 1);
+ this.suffix = nameString.chopWithRegex(suffixRegex, 1);
- // flip the before-comma and after-comma parts of the name
- name.flip(",");
+ // flip the before-comma and after-comma parts of the nameString
+ nameString.flip(",");
- // get the last name
- this.last = name.chopWithRegex(lastRegex, 0);
+ // get the last nameString
+ this.last = nameString.chopWithRegex(lastRegex, 0);
// get the first initial, if there is one
- this.leadingInit = name.chopWithRegex(leadingInitRegex, 1);
+ this.leadingInit = nameString.chopWithRegex(leadingInitRegex, 1);
- // get the first name
- this.first = name.chopWithRegex(firstRegex, 0);
+ // get the first nameString
+ this.first = nameString.chopWithRegex(firstRegex, 0);
if (StringUtils.isBlank(this.first)) {
- throw new NameParseException("Couldn't find a first name in '{" + name.getStr() + "}'");
+ throw new NameParseException("Couldn't find a first name in '{" + nameString.getStr() + "}'");
}
- // if anything's left, that's the middle name
- this.middle = name.getStr();
+ // if anything's left, that's the middle nameString
+ this.middle = nameString.getStr();
+
+ return new Name(leadingInit, first, nickname, middle, last, suffix);
}
}
http://git-wip-us.apache.org/repos/asf/commons-text/blob/685f9a86/src/main/java/org/apache/commons/text/names/Name.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/names/Name.java b/src/main/java/org/apache/commons/text/names/Name.java
index 0dd2560..3067ba5 100644
--- a/src/main/java/org/apache/commons/text/names/Name.java
+++ b/src/main/java/org/apache/commons/text/names/Name.java
@@ -16,119 +16,70 @@
*/
package org.apache.commons.text.names;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
+import java.util.Objects;
/**
- * <p>A {@code Name} object that encapsulates a name string, and contains the logic
- * for handling with Regexes.</p>
+ * An object representing the result of parsing a Name.
*
- * <p>This class is not thread-safe.</p>
+ * <p>This class is immutable.</p>
*/
-public class Name {
+public final class Name {
- /**
- * Encapsulated string. Not immutable!
- */
- private String str;
+ private final String leadingInitial;
+ private final String firstName;
+ private final String nickName;
+ private final String middleName;
+ private final String lastName;
+ private final String suffix;
- /**
- * Creates a new Name object.
- *
- * @param str encapsulated string.
- */
- public Name(String str) {
- this.str = str;
+ Name(String leadingInitial, String firstName, String nickName, String middleName, String lastName, String suffix) {
+ this.leadingInitial = leadingInitial;
+ this.firstName = firstName;
+ this.nickName = nickName;
+ this.middleName = middleName;
+ this.lastName = lastName;
+ this.suffix = suffix;
}
- /**
- * Gets the encapsulated string.
- *
- * @return encapsulated string
- */
- public String getStr() {
- return str;
+ public String getLeadingInitial() {
+ return leadingInitial;
}
- /**
- * Sets the encapsulated string value.
- *
- * @param str string value
- */
- public void setStr(String str) {
- this.str = str;
- this.norm();
+ public String getFirstName() {
+ return firstName;
}
- /**
- * Uses a regex to chop off and return part of the namestring.
- * There are two parts: first, it returns the matched substring,
- * and then it removes that substring from the encapsulated
- * string and normalizes it.
- *
- * @param regex matches the part of the namestring to chop off
- * @param submatchIndex which of the parenthesized submatches to use
- * @return the part of the namestring that got chopped off
- */
- public String chopWithRegex(String regex, int submatchIndex) {
- String chopped = "";
- Pattern pattern = Pattern.compile(regex);
- Matcher matcher = pattern.matcher(this.str);
+ public String getNickName() {
+ return nickName;
+ }
- // workdaround for numReplacements in Java
- int numReplacements = 0;
- while (matcher.find()) {
- numReplacements++;
- }
+ public String getMiddleName() {
+ return middleName;
+ }
- // recreate or the groups are gone
- pattern = Pattern.compile(regex);
- matcher = pattern.matcher(this.str);
- if (matcher.find()) {
- boolean subset = matcher.groupCount() > submatchIndex;
- if (subset) {
- this.str = this.str.replaceAll(regex, " ");
- if (numReplacements > 1) {
- throw new NameParseException("The regex being used to find the name has multiple matches.");
- }
- this.norm();
- return matcher.group(submatchIndex).trim();
- }
- }
- return chopped;
+ public String getLastName() {
+ return lastName;
}
- /**
- * Flips the front and back parts of a name with one another.
- * Front and back are determined by a specified character somewhere in the
- * middle of the string.
- *
- * @param flipAroundChar the character(s) demarcating the two halves you want to flip.
- * @throws NameParseException if a regex fails or a condition is not expected
- */
- public void flip(String flipAroundChar) {
- String[] parts = this.str.split(flipAroundChar);
- if (parts != null) {
- if (parts.length == 2) {
- this.str = String.format("%s %s", parts[1], parts[0]);
- this.norm();
- } else if (parts.length > 2) {
- throw new NameParseException(
- "Can't flip around multiple '" + flipAroundChar + "' characters in namestring.");
- }
- }
+ public String getSuffix() {
+ return suffix;
}
- /**
- * <p>Removes extra whitespace and punctuation from {@code this.str}.</p>
- *
- * <p>Strips whitespace chars from ends, strips redundant whitespace, converts
- * whitespace chars to " ".</p>
- */
- public void norm() {
- this.str = this.str.trim();
- this.str = this.str.replaceAll("\\s+", " ");
- this.str = this.str.replaceAll(",$", " ");
+ @Override
+ public boolean equals(Object o) {
+ if (this == o) return true;
+ if (o == null || getClass() != o.getClass()) return false;
+ Name name = (Name) o;
+ return Objects.equals(leadingInitial, name.leadingInitial) &&
+ Objects.equals(firstName, name.firstName) &&
+ Objects.equals(nickName, name.nickName) &&
+ Objects.equals(middleName, name.middleName) &&
+ Objects.equals(lastName, name.lastName) &&
+ Objects.equals(suffix, name.suffix);
}
+ @Override
+ public int hashCode() {
+ return Objects.hash(leadingInitial, firstName, nickName, middleName, lastName, suffix);
+ }
}
http://git-wip-us.apache.org/repos/asf/commons-text/blob/685f9a86/src/main/java/org/apache/commons/text/names/NameString.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/names/NameString.java b/src/main/java/org/apache/commons/text/names/NameString.java
new file mode 100644
index 0000000..8f606f2
--- /dev/null
+++ b/src/main/java/org/apache/commons/text/names/NameString.java
@@ -0,0 +1,134 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.text.names;
+
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+/**
+ * A wrapper around a String representing a Name to parse. Contains the logic
+ * for handling executing Regexes on the wrapped name string.
+ *
+ * <p>This class is not thread-safe.</p>
+ */
+final class NameString {
+
+ /**
+ * Encapsulated string. Not immutable!
+ */
+ private String str;
+
+ /**
+ * Creates a new Name object.
+ *
+ * @param str encapsulated string.
+ */
+ public NameString(String str) {
+ this.str = str;
+ }
+
+ /**
+ * Gets the encapsulated string.
+ *
+ * @return encapsulated string
+ */
+ public String getStr() {
+ return str;
+ }
+
+ /**
+ * Sets the encapsulated string value.
+ *
+ * @param str string value
+ */
+ public void setStr(String str) {
+ this.str = str;
+ this.norm();
+ }
+
+ /**
+ * Uses a regex to chop off and return part of the namestring.
+ * There are two parts: first, it returns the matched substring,
+ * and then it removes that substring from the encapsulated
+ * string and normalizes it.
+ *
+ * @param regex matches the part of the namestring to chop off
+ * @param submatchIndex which of the parenthesized submatches to use
+ * @return the part of the namestring that got chopped off
+ */
+ public String chopWithRegex(String regex, int submatchIndex) {
+ String chopped = "";
+ Pattern pattern = Pattern.compile(regex);
+ Matcher matcher = pattern.matcher(this.str);
+
+ // workdaround for numReplacements in Java
+ int numReplacements = 0;
+ while (matcher.find()) {
+ numReplacements++;
+ }
+
+ // recreate or the groups are gone
+ pattern = Pattern.compile(regex);
+ matcher = pattern.matcher(this.str);
+ if (matcher.find()) {
+ boolean subset = matcher.groupCount() > submatchIndex;
+ if (subset) {
+ this.str = this.str.replaceAll(regex, " ");
+ if (numReplacements > 1) {
+ throw new NameParseException("The regex being used to find the name has multiple matches.");
+ }
+ this.norm();
+ return matcher.group(submatchIndex).trim();
+ }
+ }
+ return chopped;
+ }
+
+ /**
+ * Flips the front and back parts of a name with one another.
+ * Front and back are determined by a specified character somewhere in the
+ * middle of the string.
+ *
+ * @param flipAroundChar the character(s) demarcating the two halves you want to flip.
+ * @throws NameParseException if a regex fails or a condition is not expected
+ */
+ public void flip(String flipAroundChar) {
+ String[] parts = this.str.split(flipAroundChar);
+ if (parts != null) {
+ if (parts.length == 2) {
+ this.str = String.format("%s %s", parts[1], parts[0]);
+ this.norm();
+ } else if (parts.length > 2) {
+ throw new NameParseException(
+ "Can't flip around multiple '" + flipAroundChar + "' characters in namestring.");
+ }
+ }
+ }
+
+ /**
+ * <p>Removes extra whitespace and punctuation from {@code this.str}.</p>
+ *
+ * <p>Strips whitespace chars from ends, strips redundant whitespace, converts
+ * whitespace chars to " ".</p>
+ */
+ public void norm() {
+ this.str = this.str.trim();
+ this.str = this.str.replaceAll("\\s+", " ");
+ this.str = this.str.replaceAll(",$", " ");
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/commons-text/blob/685f9a86/src/test/java/org/apache/commons/text/names/HumanNameParserTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/commons/text/names/HumanNameParserTest.java b/src/test/java/org/apache/commons/text/names/HumanNameParserTest.java
index d43d2be..d059ed4 100644
--- a/src/test/java/org/apache/commons/text/names/HumanNameParserTest.java
+++ b/src/test/java/org/apache/commons/text/names/HumanNameParserTest.java
@@ -71,26 +71,26 @@ public class HumanNameParserTest {
*/
private void validateRecord(CSVRecord record) {
HumanNameParser parser = new HumanNameParser();
- parser.parse(record.get(Colums.Name));
+ Name result = parser.parse(record.get(Colums.Name));
long recordNum = record.getRecordNumber();
assertThat("Wrong LeadingInit in record " + recordNum,
- parser.getLeadingInit(), equalTo(record.get(Colums.LeadingInit)));
-
+ result.getLeadingInitial(), equalTo(record.get(Colums.LeadingInit)));
+
assertThat("Wrong FirstName in record " + recordNum,
- parser.getFirst(), equalTo(record.get(Colums.FirstName)));
-
+ result.getFirstName(), equalTo(record.get(Colums.FirstName)));
+
assertThat("Wrong NickName in record " + recordNum,
- parser.getNickname(), equalTo(record.get(Colums.NickName)));
-
+ result.getNickName(), equalTo(record.get(Colums.NickName)));
+
assertThat("Wrong MiddleName in record " + recordNum,
- parser.getMiddle(), equalTo(record.get(Colums.MiddleName)));
-
+ result.getMiddleName(), equalTo(record.get(Colums.MiddleName)));
+
assertThat("Wrong LastName in record " + recordNum,
- parser.getLast(), equalTo(record.get(Colums.LastName)));
-
+ result.getLastName(), equalTo(record.get(Colums.LastName)));
+
assertThat("Wrong Suffix in record " + recordNum,
- parser.getSuffix(), equalTo(record.get(Colums.Suffix)));
+ result.getSuffix(), equalTo(record.get(Colums.Suffix)));
}
private enum Colums {
http://git-wip-us.apache.org/repos/asf/commons-text/blob/685f9a86/src/test/java/org/apache/commons/text/names/NameStringTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/commons/text/names/NameStringTest.java b/src/test/java/org/apache/commons/text/names/NameStringTest.java
new file mode 100644
index 0000000..494c70b
--- /dev/null
+++ b/src/test/java/org/apache/commons/text/names/NameStringTest.java
@@ -0,0 +1,104 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.text.names;
+
+import static org.junit.Assert.assertEquals;
+
+import org.junit.Before;
+import org.junit.Test;
+
+/**
+ * Tests for {@code Name} and {@code HumanNameParser}. Utilizes the same
+ * input file as the PHP library 0.2 version.
+ */
+public class NameStringTest {
+
+ private NameString nameString;
+
+ @Before
+ public void setUp() {
+ nameString = new NameString("Björn O'Malley");
+ }
+
+ @Test
+ public void testSetStrRemovesWhitespaceAtEnds() {
+ nameString.setStr(" Björn O'Malley \r\n");
+ assertEquals(
+ "Björn O'Malley",
+ nameString.getStr()
+ );
+ }
+
+ @Test
+ public void testSetStrRemovesRedudentantWhitespace(){
+ nameString.setStr(" Björn O'Malley");
+ assertEquals(
+ "Björn O'Malley",
+ nameString.getStr()
+ );
+ }
+
+ @Test
+ public void testChopWithRegexReturnsChoppedSubstring(){
+ nameString.setStr("Björn O'Malley");
+ assertEquals(
+ "Björn",
+ nameString.chopWithRegex("(^([^ ]+))(.+)", 1)
+ );
+ }
+
+ @Test
+ public void testChopWithRegexChopsStartOffNameStr(){
+ nameString.setStr("Björn O'Malley");
+ nameString.chopWithRegex("(^[^ ]+)", 0);
+ assertEquals(
+ "O'Malley",
+ nameString.getStr()
+ );
+ }
+
+ @Test
+ public void testChopWithRegexChopsEndOffNameStr(){
+ nameString.setStr("Björn O'Malley");
+ nameString.chopWithRegex("( (.+)$)", 1);
+ assertEquals(
+ "Björn",
+ nameString.getStr()
+ );
+ }
+
+ @Test
+ public void testChopWithRegexChopsMiddleFromNameStr(){
+ nameString.setStr("Björn 'Bill' O'Malley");
+ nameString.chopWithRegex("( '[^']+' )", 0);
+ assertEquals(
+ "Björn O'Malley",
+ nameString.getStr()
+ );
+ }
+
+ @Test
+ public void testFlip() {
+ nameString.setStr("O'Malley, Björn");
+ nameString.flip(",");
+ assertEquals(
+ "Björn O'Malley",
+ nameString.getStr()
+ );
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/commons-text/blob/685f9a86/src/test/java/org/apache/commons/text/names/NameTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/commons/text/names/NameTest.java b/src/test/java/org/apache/commons/text/names/NameTest.java
deleted file mode 100644
index 7822e92..0000000
--- a/src/test/java/org/apache/commons/text/names/NameTest.java
+++ /dev/null
@@ -1,104 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.commons.text.names;
-
-import static org.junit.Assert.assertEquals;
-
-import org.junit.Before;
-import org.junit.Test;
-
-/**
- * Tests for {@code Name} and {@code HumanNameParser}. Utilizes the same
- * input file as the PHP library 0.2 version.
- */
-public class NameTest {
-
- protected Name object;
-
- @Before
- public void setUp() {
- object = new Name("Björn O'Malley");
- }
-
- @Test
- public void testSetStrRemovesWhitespaceAtEnds() {
- object.setStr(" Björn O'Malley \r\n");
- assertEquals(
- "Björn O'Malley",
- object.getStr()
- );
- }
-
- @Test
- public void testSetStrRemovesRedudentantWhitespace(){
- object.setStr(" Björn O'Malley");
- assertEquals(
- "Björn O'Malley",
- object.getStr()
- );
- }
-
- @Test
- public void testChopWithRegexReturnsChoppedSubstring(){
- object.setStr("Björn O'Malley");
- assertEquals(
- "Björn",
- object.chopWithRegex("(^([^ ]+))(.+)", 1)
- );
- }
-
- @Test
- public void testChopWithRegexChopsStartOffNameStr(){
- object.setStr("Björn O'Malley");
- object.chopWithRegex("(^[^ ]+)", 0);
- assertEquals(
- "O'Malley",
- object.getStr()
- );
- }
-
- @Test
- public void testChopWithRegexChopsEndOffNameStr(){
- object.setStr("Björn O'Malley");
- object.chopWithRegex("( (.+)$)", 1);
- assertEquals(
- "Björn",
- object.getStr()
- );
- }
-
- @Test
- public void testChopWithRegexChopsMiddleFromNameStr(){
- object.setStr("Björn 'Bill' O'Malley");
- object.chopWithRegex("( '[^']+' )", 0);
- assertEquals(
- "Björn O'Malley",
- object.getStr()
- );
- }
-
- @Test
- public void testFlip() {
- object.setStr("O'Malley, Björn");
- object.flip(",");
- assertEquals(
- "Björn O'Malley",
- object.getStr()
- );
- }
-
-}
[05/13] [text] Remove state from HumanNameParser, making it immutable
Posted by ki...@apache.org.
Remove state from HumanNameParser, making it immutable
Project: http://git-wip-us.apache.org/repos/asf/commons-text/repo
Commit: http://git-wip-us.apache.org/repos/asf/commons-text/commit/1f6c5dae
Tree: http://git-wip-us.apache.org/repos/asf/commons-text/tree/1f6c5dae
Diff: http://git-wip-us.apache.org/repos/asf/commons-text/diff/1f6c5dae
Branch: refs/heads/master
Commit: 1f6c5daecded67a17c07371a564f74ef623b3f29
Parents: 685f9a8
Author: Benedikt Ritter <br...@apache.org>
Authored: Sun Apr 19 16:28:37 2015 +0200
Committer: Benedikt Ritter <br...@apache.org>
Committed: Sun Apr 19 16:28:37 2015 +0200
----------------------------------------------------------------------
.../commons/text/names/HumanNameParser.java | 141 +++----------------
.../org/apache/commons/text/names/Name.java | 32 +++++
2 files changed, 51 insertions(+), 122 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/commons-text/blob/1f6c5dae/src/main/java/org/apache/commons/text/names/HumanNameParser.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/names/HumanNameParser.java b/src/main/java/org/apache/commons/text/names/HumanNameParser.java
index df8e55c..c47abde 100644
--- a/src/main/java/org/apache/commons/text/names/HumanNameParser.java
+++ b/src/main/java/org/apache/commons/text/names/HumanNameParser.java
@@ -61,135 +61,32 @@ import org.apache.commons.lang3.StringUtils;
* <p>This implementation is based on the Java implementation, with additions
* suggested in <a href="https://issues.apache.org/jira/browse/SANDBOX-487">SANDBOX-487</a>.</p>
*
- * <p>This class is not thread-safe.</p>
+ * <p>This class is immutable.</p>
*/
public class HumanNameParser {
/**
- * Leading init part.
- */
- private String leadingInit;
- /**
- * First name.
- */
- private String first;
- /**
- * Single nickname found in the name input.
- */
- private String nickname;
- /**
- * Middle name.
- */
- private String middle;
- /**
- * Last name.
- */
- private String last;
- /**
- * Name suffix.
- */
- private String suffix;
- /**
* Suffixes found.
*/
- private List<String> suffixes;
+ private final List<String> suffixes;
/**
* Prefixes found.
*/
- private List<String> prefixes;
+ private final List<String> prefixes;
/**
* Creates a parser given a string name.
*/
public HumanNameParser() {
- this.leadingInit = "";
- this.first = "";
- this.nickname = "";
- this.middle = "";
- this.last = "";
- this.suffix = "";
-
- this.suffixes = Arrays.asList(new String[]{
+ // TODO make this configurable
+ this.suffixes = Arrays.asList(
"esq", "esquire", "jr",
- "sr", "2", "ii", "iii", "iv"});
- this.prefixes = Arrays
- .asList(new String[] {
+ "sr", "2", "ii", "iii", "iv");
+ this.prefixes = Arrays.asList(
"bar", "ben", "bin", "da", "dal",
"de la", "de", "del", "der", "di", "ibn", "la", "le",
"san", "st", "ste", "van", "van der", "van den", "vel",
- "von" });
- }
-
- /**
- * Gets the leading init part of the name.
- *
- * @return the leading init part of the name
- */
- public String getLeadingInit() {
- return leadingInit;
- }
-
- /**
- * Gets the first name.
- *
- * @return first name
- */
- public String getFirst() {
- return first;
- }
-
- /**
- * Gets the nickname.
- *
- * @return the nickname
- */
- public String getNickname() {
- return nickname;
- }
-
- /**
- * Gets the middle name.
- *
- * @return the middle name
- */
- public String getMiddle() {
- return middle;
- }
-
- /**
- * Gets the last name.
- *
- * @return the last name
- */
- public String getLast() {
- return last;
- }
-
- /**
- * Gets the suffix part of the name.
- *
- * @return the name suffix
- */
- public String getSuffix() {
- return suffix;
- }
-
- /**
- * Gets the name suffixes.
- *
- * @return the name suffixes
- */
- public List<String> getSuffixes() {
- return suffixes;
- }
-
- /**
- * Gets the name prefixes.
- *
- * @return the name prefixes
- */
- public List<String> getPrefixes() {
- return prefixes;
+ "von" );
}
/**
@@ -218,28 +115,28 @@ public class HumanNameParser {
String firstRegex = "(?i)^([^ ]+)";
// get nickname, if there is one
- this.nickname = nameString.chopWithRegex(nicknamesRegex, 2);
+ String nickname = nameString.chopWithRegex(nicknamesRegex, 2);
// get suffix, if there is one
- this.suffix = nameString.chopWithRegex(suffixRegex, 1);
+ String suffix = nameString.chopWithRegex(suffixRegex, 1);
- // flip the before-comma and after-comma parts of the nameString
+ // flip the before-comma and after-comma parts of the name
nameString.flip(",");
- // get the last nameString
- this.last = nameString.chopWithRegex(lastRegex, 0);
+ // get the last name
+ String last = nameString.chopWithRegex(lastRegex, 0);
// get the first initial, if there is one
- this.leadingInit = nameString.chopWithRegex(leadingInitRegex, 1);
+ String leadingInit = nameString.chopWithRegex(leadingInitRegex, 1);
- // get the first nameString
- this.first = nameString.chopWithRegex(firstRegex, 0);
- if (StringUtils.isBlank(this.first)) {
+ // get the first name
+ String first = nameString.chopWithRegex(firstRegex, 0);
+ if (StringUtils.isBlank(first)) {
throw new NameParseException("Couldn't find a first name in '{" + nameString.getStr() + "}'");
}
- // if anything's left, that's the middle nameString
- this.middle = nameString.getStr();
+ // if anything's left, that's the middle name
+ String middle = nameString.getStr();
return new Name(leadingInit, first, nickname, middle, last, suffix);
}
http://git-wip-us.apache.org/repos/asf/commons-text/blob/1f6c5dae/src/main/java/org/apache/commons/text/names/Name.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/names/Name.java b/src/main/java/org/apache/commons/text/names/Name.java
index 3067ba5..7e32de4 100644
--- a/src/main/java/org/apache/commons/text/names/Name.java
+++ b/src/main/java/org/apache/commons/text/names/Name.java
@@ -41,26 +41,58 @@ public final class Name {
this.suffix = suffix;
}
+ // TODO Add an example to each getter
+
+ /**
+ * Gets the leading init part of the name.
+ *
+ * @return the leading init part of the name
+ */
public String getLeadingInitial() {
return leadingInitial;
}
+ /**
+ * Gets the first name.
+ *
+ * @return first name
+ */
public String getFirstName() {
return firstName;
}
+ /**
+ * Gets the nickname.
+ *
+ * @return the nickname
+ */
public String getNickName() {
return nickName;
}
+ /**
+ * Gets the middle name.
+ *
+ * @return the middle name
+ */
public String getMiddleName() {
return middleName;
}
+ /**
+ * Gets the last name.
+ *
+ * @return the last name
+ */
public String getLastName() {
return lastName;
}
+ /**
+ * Gets the suffix part of the name.
+ *
+ * @return the name suffix
+ */
public String getSuffix() {
return suffix;
}
[12/13] [text] Add SANDBOX-498 to the list of fixed issues
Posted by ki...@apache.org.
Add SANDBOX-498 to the list of fixed issues
Project: http://git-wip-us.apache.org/repos/asf/commons-text/repo
Commit: http://git-wip-us.apache.org/repos/asf/commons-text/commit/c1372c1f
Tree: http://git-wip-us.apache.org/repos/asf/commons-text/tree/c1372c1f
Diff: http://git-wip-us.apache.org/repos/asf/commons-text/diff/c1372c1f
Branch: refs/heads/master
Commit: c1372c1f9754434995c9a91fe47508946ff5744f
Parents: 6d047a4
Author: Benedikt Ritter <br...@apache.org>
Authored: Sun Apr 19 17:14:22 2015 +0200
Committer: Benedikt Ritter <br...@apache.org>
Committed: Sun Apr 19 17:14:22 2015 +0200
----------------------------------------------------------------------
src/changes/changes.xml | 1 +
1 file changed, 1 insertion(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/commons-text/blob/c1372c1f/src/changes/changes.xml
----------------------------------------------------------------------
diff --git a/src/changes/changes.xml b/src/changes/changes.xml
index fbb60b9..0a77677 100644
--- a/src/changes/changes.xml
+++ b/src/changes/changes.xml
@@ -22,6 +22,7 @@
<body>
<release version="1.0" date="tba" description="tba">
+ <action issue="SANDBOX-498" type="update" dev="britter">Improve HumanNameParser</action>
<action issue="SANDBOX-497" type="fix" dev="kinow">IP clearance for the names package</action>
<action issue="SANDBOX-496" type="add" dev="kinow">Write user guide</action>
<action issue="SANDBOX-488" type="fix" dev="kinow">Work on the string metric, distance, and similarity definitions for the project</action>
[13/13] [text] Merge remote-tracking branch
'remotes/origin/SANDBOX-498' for issue SANDBOX-498
Posted by ki...@apache.org.
Merge remote-tracking branch 'remotes/origin/SANDBOX-498' for issue SANDBOX-498
Project: http://git-wip-us.apache.org/repos/asf/commons-text/repo
Commit: http://git-wip-us.apache.org/repos/asf/commons-text/commit/bf8bfb0a
Tree: http://git-wip-us.apache.org/repos/asf/commons-text/tree/bf8bfb0a
Diff: http://git-wip-us.apache.org/repos/asf/commons-text/diff/bf8bfb0a
Branch: refs/heads/master
Commit: bf8bfb0a46c0e6d7f9e3d3416bf2f147c9b81074
Parents: e8e85d9 c1372c1
Author: Bruno P. Kinoshita <br...@yahoo.com.br>
Authored: Mon Apr 20 14:58:27 2015 +1200
Committer: Bruno P. Kinoshita <br...@yahoo.com.br>
Committed: Mon Apr 20 14:58:27 2015 +1200
----------------------------------------------------------------------
src/changes/changes.xml | 1 +
.../commons/text/names/HumanNameParser.java | 279 +++++++------------
.../org/apache/commons/text/names/Name.java | 141 +++++-----
.../commons/text/names/NameParseException.java | 2 +-
.../apache/commons/text/names/NameString.java | 122 ++++++++
.../commons/text/names/HumanNameParserTest.java | 43 +--
.../commons/text/names/NameStringTest.java | 77 +++++
.../org/apache/commons/text/names/NameTest.java | 104 -------
8 files changed, 381 insertions(+), 388 deletions(-)
----------------------------------------------------------------------
[08/13] [text] Make classes in the name package final.
Posted by ki...@apache.org.
Make classes in the name package final.
Project: http://git-wip-us.apache.org/repos/asf/commons-text/repo
Commit: http://git-wip-us.apache.org/repos/asf/commons-text/commit/9e340643
Tree: http://git-wip-us.apache.org/repos/asf/commons-text/tree/9e340643
Diff: http://git-wip-us.apache.org/repos/asf/commons-text/diff/9e340643
Branch: refs/heads/master
Commit: 9e340643cfebd7b4088fd9946b3e92fc9f8cd394
Parents: a942b4c
Author: Benedikt Ritter <br...@apache.org>
Authored: Sun Apr 19 16:32:31 2015 +0200
Committer: Benedikt Ritter <br...@apache.org>
Committed: Sun Apr 19 16:32:31 2015 +0200
----------------------------------------------------------------------
src/main/java/org/apache/commons/text/names/HumanNameParser.java | 2 +-
.../java/org/apache/commons/text/names/NameParseException.java | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/commons-text/blob/9e340643/src/main/java/org/apache/commons/text/names/HumanNameParser.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/names/HumanNameParser.java b/src/main/java/org/apache/commons/text/names/HumanNameParser.java
index c47abde..a29e375 100644
--- a/src/main/java/org/apache/commons/text/names/HumanNameParser.java
+++ b/src/main/java/org/apache/commons/text/names/HumanNameParser.java
@@ -63,7 +63,7 @@ import org.apache.commons.lang3.StringUtils;
*
* <p>This class is immutable.</p>
*/
-public class HumanNameParser {
+public final class HumanNameParser {
/**
* Suffixes found.
http://git-wip-us.apache.org/repos/asf/commons-text/blob/9e340643/src/main/java/org/apache/commons/text/names/NameParseException.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/names/NameParseException.java b/src/main/java/org/apache/commons/text/names/NameParseException.java
index b09c2d6..4fe5eda 100644
--- a/src/main/java/org/apache/commons/text/names/NameParseException.java
+++ b/src/main/java/org/apache/commons/text/names/NameParseException.java
@@ -19,7 +19,7 @@ package org.apache.commons.text.names;
/**
* Name parse exception.
*/
-public class NameParseException extends RuntimeException {
+public final class NameParseException extends RuntimeException {
/**
* Serial UID.
[07/13] [text] Fix typo
Posted by ki...@apache.org.
Fix typo
Project: http://git-wip-us.apache.org/repos/asf/commons-text/repo
Commit: http://git-wip-us.apache.org/repos/asf/commons-text/commit/a942b4c0
Tree: http://git-wip-us.apache.org/repos/asf/commons-text/tree/a942b4c0
Diff: http://git-wip-us.apache.org/repos/asf/commons-text/diff/a942b4c0
Branch: refs/heads/master
Commit: a942b4c02194a6f544f129e89e0f399d51c5c01a
Parents: bbba0a3
Author: Benedikt Ritter <br...@apache.org>
Authored: Sun Apr 19 16:31:01 2015 +0200
Committer: Benedikt Ritter <br...@apache.org>
Committed: Sun Apr 19 16:31:01 2015 +0200
----------------------------------------------------------------------
.../commons/text/names/HumanNameParserTest.java | 16 ++++++++--------
1 file changed, 8 insertions(+), 8 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/commons-text/blob/a942b4c0/src/test/java/org/apache/commons/text/names/HumanNameParserTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/commons/text/names/HumanNameParserTest.java b/src/test/java/org/apache/commons/text/names/HumanNameParserTest.java
index 314a949..f6c9ba6 100644
--- a/src/test/java/org/apache/commons/text/names/HumanNameParserTest.java
+++ b/src/test/java/org/apache/commons/text/names/HumanNameParserTest.java
@@ -71,29 +71,29 @@ public class HumanNameParserTest {
* @param record a CSVRecord representing one record in the input file.
*/
private void validateRecord(CSVRecord record) {
- Name result = nameParser.parse(record.get(Colums.Name));
+ Name result = nameParser.parse(record.get(Columns.Name));
long recordNum = record.getRecordNumber();
assertThat("Wrong LeadingInit in record " + recordNum,
- result.getLeadingInitial(), equalTo(record.get(Colums.LeadingInit)));
+ result.getLeadingInitial(), equalTo(record.get(Columns.LeadingInit)));
assertThat("Wrong FirstName in record " + recordNum,
- result.getFirstName(), equalTo(record.get(Colums.FirstName)));
+ result.getFirstName(), equalTo(record.get(Columns.FirstName)));
assertThat("Wrong NickName in record " + recordNum,
- result.getNickName(), equalTo(record.get(Colums.NickName)));
+ result.getNickName(), equalTo(record.get(Columns.NickName)));
assertThat("Wrong MiddleName in record " + recordNum,
- result.getMiddleName(), equalTo(record.get(Colums.MiddleName)));
+ result.getMiddleName(), equalTo(record.get(Columns.MiddleName)));
assertThat("Wrong LastName in record " + recordNum,
- result.getLastName(), equalTo(record.get(Colums.LastName)));
+ result.getLastName(), equalTo(record.get(Columns.LastName)));
assertThat("Wrong Suffix in record " + recordNum,
- result.getSuffix(), equalTo(record.get(Colums.Suffix)));
+ result.getSuffix(), equalTo(record.get(Columns.Suffix)));
}
- private enum Colums {
+ private enum Columns {
Name,LeadingInit,FirstName,NickName,MiddleName,LastName,Suffix
}
}
[09/13] [text] Drop unused code from NameString and clean up
NameStringTest
Posted by ki...@apache.org.
Drop unused code from NameString and clean up NameStringTest
Project: http://git-wip-us.apache.org/repos/asf/commons-text/repo
Commit: http://git-wip-us.apache.org/repos/asf/commons-text/commit/ed985cd5
Tree: http://git-wip-us.apache.org/repos/asf/commons-text/tree/ed985cd5
Diff: http://git-wip-us.apache.org/repos/asf/commons-text/diff/ed985cd5
Branch: refs/heads/master
Commit: ed985cd51220e956f516acecf1039defd0141d34
Parents: 9e34064
Author: Benedikt Ritter <br...@apache.org>
Authored: Sun Apr 19 16:44:32 2015 +0200
Committer: Benedikt Ritter <br...@apache.org>
Committed: Sun Apr 19 16:44:32 2015 +0200
----------------------------------------------------------------------
.../commons/text/names/HumanNameParser.java | 5 +-
.../apache/commons/text/names/NameString.java | 24 ++-----
.../commons/text/names/NameStringTest.java | 67 ++++++--------------
3 files changed, 30 insertions(+), 66 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/commons-text/blob/ed985cd5/src/main/java/org/apache/commons/text/names/HumanNameParser.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/names/HumanNameParser.java b/src/main/java/org/apache/commons/text/names/HumanNameParser.java
index a29e375..b5c0aa3 100644
--- a/src/main/java/org/apache/commons/text/names/HumanNameParser.java
+++ b/src/main/java/org/apache/commons/text/names/HumanNameParser.java
@@ -100,6 +100,7 @@ public final class HumanNameParser {
Objects.requireNonNull(name, "Parameter 'name' must not be null.");
NameString nameString = new NameString(name);
+ // TODO compile regexes only once when the parser is created
String suffixes = StringUtils.join(this.suffixes, "\\.*|") + "\\.*";
String prefixes = StringUtils.join(this.prefixes, " |") + " ";
@@ -132,11 +133,11 @@ public final class HumanNameParser {
// get the first name
String first = nameString.chopWithRegex(firstRegex, 0);
if (StringUtils.isBlank(first)) {
- throw new NameParseException("Couldn't find a first name in '{" + nameString.getStr() + "}'");
+ throw new NameParseException("Couldn't find a first name in '{" + nameString.getWrappedString() + "}'");
}
// if anything's left, that's the middle name
- String middle = nameString.getStr();
+ String middle = nameString.getWrappedString();
return new Name(leadingInit, first, nickname, middle, last, suffix);
}
http://git-wip-us.apache.org/repos/asf/commons-text/blob/ed985cd5/src/main/java/org/apache/commons/text/names/NameString.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/names/NameString.java b/src/main/java/org/apache/commons/text/names/NameString.java
index 8f606f2..54e2753 100644
--- a/src/main/java/org/apache/commons/text/names/NameString.java
+++ b/src/main/java/org/apache/commons/text/names/NameString.java
@@ -37,30 +37,20 @@ final class NameString {
*
* @param str encapsulated string.
*/
- public NameString(String str) {
+ NameString(String str) {
this.str = str;
}
/**
- * Gets the encapsulated string.
+ * Gets the wrapped string.
*
- * @return encapsulated string
+ * @return wrapped string
*/
- public String getStr() {
+ String getWrappedString() {
return str;
}
/**
- * Sets the encapsulated string value.
- *
- * @param str string value
- */
- public void setStr(String str) {
- this.str = str;
- this.norm();
- }
-
- /**
* Uses a regex to chop off and return part of the namestring.
* There are two parts: first, it returns the matched substring,
* and then it removes that substring from the encapsulated
@@ -70,7 +60,7 @@ final class NameString {
* @param submatchIndex which of the parenthesized submatches to use
* @return the part of the namestring that got chopped off
*/
- public String chopWithRegex(String regex, int submatchIndex) {
+ String chopWithRegex(String regex, int submatchIndex) {
String chopped = "";
Pattern pattern = Pattern.compile(regex);
Matcher matcher = pattern.matcher(this.str);
@@ -106,7 +96,7 @@ final class NameString {
* @param flipAroundChar the character(s) demarcating the two halves you want to flip.
* @throws NameParseException if a regex fails or a condition is not expected
*/
- public void flip(String flipAroundChar) {
+ void flip(String flipAroundChar) {
String[] parts = this.str.split(flipAroundChar);
if (parts != null) {
if (parts.length == 2) {
@@ -125,7 +115,7 @@ final class NameString {
* <p>Strips whitespace chars from ends, strips redundant whitespace, converts
* whitespace chars to " ".</p>
*/
- public void norm() {
+ private void norm() {
this.str = this.str.trim();
this.str = this.str.replaceAll("\\s+", " ");
this.str = this.str.replaceAll(",$", " ");
http://git-wip-us.apache.org/repos/asf/commons-text/blob/ed985cd5/src/test/java/org/apache/commons/text/names/NameStringTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/commons/text/names/NameStringTest.java b/src/test/java/org/apache/commons/text/names/NameStringTest.java
index 494c70b..e087579 100644
--- a/src/test/java/org/apache/commons/text/names/NameStringTest.java
+++ b/src/test/java/org/apache/commons/text/names/NameStringTest.java
@@ -16,7 +16,8 @@
*/
package org.apache.commons.text.names;
-import static org.junit.Assert.assertEquals;
+import static org.hamcrest.Matchers.equalTo;
+import static org.junit.Assert.assertThat;
import org.junit.Before;
import org.junit.Test;
@@ -35,70 +36,42 @@ public class NameStringTest {
}
@Test
- public void testSetStrRemovesWhitespaceAtEnds() {
- nameString.setStr(" Björn O'Malley \r\n");
- assertEquals(
- "Björn O'Malley",
- nameString.getStr()
- );
- }
+ public void testChopWithRegexReturnsChoppedSubstring() {
+ NameString nameString = new NameString("Björn O'Malley");
- @Test
- public void testSetStrRemovesRedudentantWhitespace(){
- nameString.setStr(" Björn O'Malley");
- assertEquals(
- "Björn O'Malley",
- nameString.getStr()
- );
- }
-
- @Test
- public void testChopWithRegexReturnsChoppedSubstring(){
- nameString.setStr("Björn O'Malley");
- assertEquals(
- "Björn",
- nameString.chopWithRegex("(^([^ ]+))(.+)", 1)
- );
+ assertThat(nameString.chopWithRegex("(^([^ ]+))(.+)", 1), equalTo("Björn"));
}
@Test
- public void testChopWithRegexChopsStartOffNameStr(){
- nameString.setStr("Björn O'Malley");
+ public void testChopWithRegexChopsStartOffNameStr() {
+ NameString nameString = new NameString("Björn O'Malley");
nameString.chopWithRegex("(^[^ ]+)", 0);
- assertEquals(
- "O'Malley",
- nameString.getStr()
- );
+
+ assertThat(nameString.getWrappedString(), equalTo("O'Malley"));
}
@Test
- public void testChopWithRegexChopsEndOffNameStr(){
- nameString.setStr("Björn O'Malley");
+ public void testChopWithRegexChopsEndOffNameStr() {
+ NameString nameString = new NameString("Björn O'Malley");
nameString.chopWithRegex("( (.+)$)", 1);
- assertEquals(
- "Björn",
- nameString.getStr()
- );
+
+ assertThat(nameString.getWrappedString(), equalTo("Björn"));
}
@Test
- public void testChopWithRegexChopsMiddleFromNameStr(){
- nameString.setStr("Björn 'Bill' O'Malley");
+ public void testChopWithRegexChopsMiddleFromNameStr() {
+ NameString nameString = new NameString("Björn 'Bill' O'Malley");
nameString.chopWithRegex("( '[^']+' )", 0);
- assertEquals(
- "Björn O'Malley",
- nameString.getStr()
- );
+
+ assertThat(nameString.getWrappedString(), equalTo("Björn O'Malley"));
}
@Test
public void testFlip() {
- nameString.setStr("O'Malley, Björn");
+ NameString nameString = new NameString("O'Malley, Björn");
nameString.flip(",");
- assertEquals(
- "Björn O'Malley",
- nameString.getStr()
- );
+
+ assertThat(nameString.getWrappedString(), equalTo("Björn O'Malley"));
}
}