You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@commons.apache.org by br...@apache.org on 2015/04/19 17:14:50 UTC

[01/12] [text] Make parse method public

Repository: commons-text
Updated Branches:
  refs/heads/SANDBOX-498 [created] c1372c1f9


Make parse method public


Project: http://git-wip-us.apache.org/repos/asf/commons-text/repo
Commit: http://git-wip-us.apache.org/repos/asf/commons-text/commit/aa293500
Tree: http://git-wip-us.apache.org/repos/asf/commons-text/tree/aa293500
Diff: http://git-wip-us.apache.org/repos/asf/commons-text/diff/aa293500

Branch: refs/heads/SANDBOX-498
Commit: aa293500080d6872b3ac653dcf74a50cf8223ae5
Parents: e8e85d9
Author: Benedikt Ritter <br...@apache.org>
Authored: Sun Apr 19 15:58:16 2015 +0200
Committer: Benedikt Ritter <br...@apache.org>
Committed: Sun Apr 19 15:58:16 2015 +0200

----------------------------------------------------------------------
 src/main/java/org/apache/commons/text/names/HumanNameParser.java | 4 +---
 .../java/org/apache/commons/text/names/HumanNameParserTest.java  | 1 +
 2 files changed, 2 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/commons-text/blob/aa293500/src/main/java/org/apache/commons/text/names/HumanNameParser.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/names/HumanNameParser.java b/src/main/java/org/apache/commons/text/names/HumanNameParser.java
index 843685a..5088bba 100644
--- a/src/main/java/org/apache/commons/text/names/HumanNameParser.java
+++ b/src/main/java/org/apache/commons/text/names/HumanNameParser.java
@@ -134,8 +134,6 @@ public class HumanNameParser {
                     "de la", "de", "del", "der", "di", "ibn", "la", "le",
                     "san", "st", "ste", "van", "van der", "van den", "vel",
                     "von" });
-
-        this.parse();
     }
 
     /**
@@ -224,7 +222,7 @@ public class HumanNameParser {
      *
      * @throws NameParseException if the parser fails to retrieve the name parts
      */
-    private void parse() {
+    public void parse() {
         String suffixes = StringUtils.join(this.suffixes, "\\.*|") + "\\.*";
         String prefixes = StringUtils.join(this.prefixes, " |") + " ";
 

http://git-wip-us.apache.org/repos/asf/commons-text/blob/aa293500/src/test/java/org/apache/commons/text/names/HumanNameParserTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/commons/text/names/HumanNameParserTest.java b/src/test/java/org/apache/commons/text/names/HumanNameParserTest.java
index 90e1dfa..5ff7805 100644
--- a/src/test/java/org/apache/commons/text/names/HumanNameParserTest.java
+++ b/src/test/java/org/apache/commons/text/names/HumanNameParserTest.java
@@ -65,6 +65,7 @@ public class HumanNameParserTest {
      */
     private void validateRecord(CSVRecord record) {
         HumanNameParser parser = new HumanNameParser(record.get(Colums.Name));
+        parser.parse();
 
         long recordNum = record.getRecordNumber();
         assertThat("Wrong LeadingInit in record " + recordNum,


[03/12] [text] Check for null inputs

Posted by br...@apache.org.
Check for null inputs


Project: http://git-wip-us.apache.org/repos/asf/commons-text/repo
Commit: http://git-wip-us.apache.org/repos/asf/commons-text/commit/9a0cc85a
Tree: http://git-wip-us.apache.org/repos/asf/commons-text/tree/9a0cc85a
Diff: http://git-wip-us.apache.org/repos/asf/commons-text/diff/9a0cc85a

Branch: refs/heads/SANDBOX-498
Commit: 9a0cc85ad01dcf1f468736984cdd5dec0a7a3bf3
Parents: df7e7a7
Author: Benedikt Ritter <br...@apache.org>
Authored: Sun Apr 19 16:06:09 2015 +0200
Committer: Benedikt Ritter <br...@apache.org>
Committed: Sun Apr 19 16:06:09 2015 +0200

----------------------------------------------------------------------
 .../java/org/apache/commons/text/names/HumanNameParser.java  | 8 ++++++--
 .../org/apache/commons/text/names/HumanNameParserTest.java   | 6 ++++++
 2 files changed, 12 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/commons-text/blob/9a0cc85a/src/main/java/org/apache/commons/text/names/HumanNameParser.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/names/HumanNameParser.java b/src/main/java/org/apache/commons/text/names/HumanNameParser.java
index bf8f9ed..fa2433a 100644
--- a/src/main/java/org/apache/commons/text/names/HumanNameParser.java
+++ b/src/main/java/org/apache/commons/text/names/HumanNameParser.java
@@ -19,6 +19,7 @@ package org.apache.commons.text.names;
 
 import java.util.Arrays;
 import java.util.List;
+import java.util.Objects;
 
 import org.apache.commons.lang3.StringUtils;
 
@@ -194,10 +195,13 @@ public class HumanNameParser {
     /**
      * Consumes the string and creates the name parts.
      *
-     * @param nameStr the name to parse.
-     * @throws NameParseException if the parser fails to retrieve the name parts
+     * @param nameStr the name to parse. Must not be null.
+     * @throws NameParseException if the parser fails to retrieve the name parts.
+     * @throws NullPointerException if nameStr is null.
      */
     public void parse(String nameStr) {
+        Objects.requireNonNull(nameStr, "Parameter 'nameStr' must not be null.");
+
         Name name = new Name(nameStr);
         String suffixes = StringUtils.join(this.suffixes, "\\.*|") + "\\.*";
         String prefixes = StringUtils.join(this.prefixes, " |") + " ";

http://git-wip-us.apache.org/repos/asf/commons-text/blob/9a0cc85a/src/test/java/org/apache/commons/text/names/HumanNameParserTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/commons/text/names/HumanNameParserTest.java b/src/test/java/org/apache/commons/text/names/HumanNameParserTest.java
index 478d19c..d43d2be 100644
--- a/src/test/java/org/apache/commons/text/names/HumanNameParserTest.java
+++ b/src/test/java/org/apache/commons/text/names/HumanNameParserTest.java
@@ -50,6 +50,12 @@ public class HumanNameParserTest {
         }
     }
 
+    @Test(expected = NullPointerException.class)
+    public void shouldThrowNullPointerException_WhenNullIsParsed() throws Exception {
+        HumanNameParser parser = new HumanNameParser();
+        parser.parse(null);
+    }
+
     @Test
     public void testInputs() {
         for (CSVRecord record : parser) {


[02/12] [text] Pass the name to parse as parameter to the parse method

Posted by br...@apache.org.
Pass the name to parse as parameter to the parse method


Project: http://git-wip-us.apache.org/repos/asf/commons-text/repo
Commit: http://git-wip-us.apache.org/repos/asf/commons-text/commit/df7e7a7b
Tree: http://git-wip-us.apache.org/repos/asf/commons-text/tree/df7e7a7b
Diff: http://git-wip-us.apache.org/repos/asf/commons-text/diff/df7e7a7b

Branch: refs/heads/SANDBOX-498
Commit: df7e7a7b0aba73a1bf09c41dbd32e913252a8707
Parents: aa29350
Author: Benedikt Ritter <br...@apache.org>
Authored: Sun Apr 19 16:02:55 2015 +0200
Committer: Benedikt Ritter <br...@apache.org>
Committed: Sun Apr 19 16:02:55 2015 +0200

----------------------------------------------------------------------
 .../commons/text/names/HumanNameParser.java     | 52 ++++++--------------
 .../commons/text/names/HumanNameParserTest.java |  4 +-
 2 files changed, 16 insertions(+), 40 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/commons-text/blob/df7e7a7b/src/main/java/org/apache/commons/text/names/HumanNameParser.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/names/HumanNameParser.java b/src/main/java/org/apache/commons/text/names/HumanNameParser.java
index 5088bba..bf8f9ed 100644
--- a/src/main/java/org/apache/commons/text/names/HumanNameParser.java
+++ b/src/main/java/org/apache/commons/text/names/HumanNameParser.java
@@ -65,10 +65,6 @@ import org.apache.commons.lang3.StringUtils;
 public class HumanNameParser {
 
     /**
-     * Name parsed.
-     */
-    private Name name;
-    /**
      * Leading init part.
      */
     private String leadingInit;
@@ -103,21 +99,8 @@ public class HumanNameParser {
 
     /**
      * Creates a parser given a string name.
-     *
-     * @param name string name
-     */
-    public HumanNameParser(String name) {
-        this(new Name(name));
-    }
-
-    /**
-     * Creates a parser given a {@code Name} object.
-     *
-     * @param name {@code Name}
      */
-    public HumanNameParser(Name name) {
-        this.name = name;
-
+    public HumanNameParser() {
         this.leadingInit = "";
         this.first = "";
         this.nickname = "";
@@ -125,9 +108,9 @@ public class HumanNameParser {
         this.last = "";
         this.suffix = "";
 
-        this.suffixes = Arrays.asList(new String[] {
+        this.suffixes = Arrays.asList(new String[]{
                 "esq", "esquire", "jr",
-                "sr", "2", "ii", "iii", "iv" });
+                "sr", "2", "ii", "iii", "iv"});
         this.prefixes = Arrays
             .asList(new String[] {
                     "bar", "ben", "bin", "da", "dal",
@@ -137,15 +120,6 @@ public class HumanNameParser {
     }
 
     /**
-     * Gets the {@code Name} object.
-     *
-     * @return the {@code Name} object
-     */
-    public Name getName() {
-        return name;
-    }
-
-    /**
      * Gets the leading init part of the name.
      *
      * @return the leading init part of the name
@@ -220,9 +194,11 @@ public class HumanNameParser {
     /**
      * Consumes the string and creates the name parts.
      *
+     * @param nameStr the name to parse.
      * @throws NameParseException if the parser fails to retrieve the name parts
      */
-    public void parse() {
+    public void parse(String nameStr) {
+        Name name = new Name(nameStr);
         String suffixes = StringUtils.join(this.suffixes, "\\.*|") + "\\.*";
         String prefixes = StringUtils.join(this.prefixes, " |") + " ";
 
@@ -238,28 +214,28 @@ public class HumanNameParser {
         String firstRegex = "(?i)^([^ ]+)";
 
         // get nickname, if there is one
-        this.nickname = this.name.chopWithRegex(nicknamesRegex, 2);
+        this.nickname = name.chopWithRegex(nicknamesRegex, 2);
 
         // get suffix, if there is one
-        this.suffix = this.name.chopWithRegex(suffixRegex, 1);
+        this.suffix = name.chopWithRegex(suffixRegex, 1);
 
         // flip the before-comma and after-comma parts of the name
-        this.name.flip(",");
+        name.flip(",");
 
         // get the last name
-        this.last = this.name.chopWithRegex(lastRegex, 0);
+        this.last = name.chopWithRegex(lastRegex, 0);
 
         // get the first initial, if there is one
-        this.leadingInit = this.name.chopWithRegex(leadingInitRegex, 1);
+        this.leadingInit = name.chopWithRegex(leadingInitRegex, 1);
 
         // get the first name
-        this.first = this.name.chopWithRegex(firstRegex, 0);
+        this.first = name.chopWithRegex(firstRegex, 0);
         if (StringUtils.isBlank(this.first)) {
-            throw new NameParseException("Couldn't find a first name in '{" + this.name.getStr() + "}'");
+            throw new NameParseException("Couldn't find a first name in '{" + name.getStr() + "}'");
         }
 
         // if anything's left, that's the middle name
-        this.middle = this.name.getStr();
+        this.middle = name.getStr();
     }
 
 }

http://git-wip-us.apache.org/repos/asf/commons-text/blob/df7e7a7b/src/test/java/org/apache/commons/text/names/HumanNameParserTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/commons/text/names/HumanNameParserTest.java b/src/test/java/org/apache/commons/text/names/HumanNameParserTest.java
index 5ff7805..478d19c 100644
--- a/src/test/java/org/apache/commons/text/names/HumanNameParserTest.java
+++ b/src/test/java/org/apache/commons/text/names/HumanNameParserTest.java
@@ -64,8 +64,8 @@ public class HumanNameParserTest {
      * @param record a CSVRecord representing one record in the input file.
      */
     private void validateRecord(CSVRecord record) {
-        HumanNameParser parser = new HumanNameParser(record.get(Colums.Name));
-        parser.parse();
+        HumanNameParser parser = new HumanNameParser();
+        parser.parse(record.get(Colums.Name));
 
         long recordNum = record.getRecordNumber();
         assertThat("Wrong LeadingInit in record " + recordNum,


[08/12] [text] Make classes in the name package final.

Posted by br...@apache.org.
Make classes in the name package final.


Project: http://git-wip-us.apache.org/repos/asf/commons-text/repo
Commit: http://git-wip-us.apache.org/repos/asf/commons-text/commit/9e340643
Tree: http://git-wip-us.apache.org/repos/asf/commons-text/tree/9e340643
Diff: http://git-wip-us.apache.org/repos/asf/commons-text/diff/9e340643

Branch: refs/heads/SANDBOX-498
Commit: 9e340643cfebd7b4088fd9946b3e92fc9f8cd394
Parents: a942b4c
Author: Benedikt Ritter <br...@apache.org>
Authored: Sun Apr 19 16:32:31 2015 +0200
Committer: Benedikt Ritter <br...@apache.org>
Committed: Sun Apr 19 16:32:31 2015 +0200

----------------------------------------------------------------------
 src/main/java/org/apache/commons/text/names/HumanNameParser.java   | 2 +-
 .../java/org/apache/commons/text/names/NameParseException.java     | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/commons-text/blob/9e340643/src/main/java/org/apache/commons/text/names/HumanNameParser.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/names/HumanNameParser.java b/src/main/java/org/apache/commons/text/names/HumanNameParser.java
index c47abde..a29e375 100644
--- a/src/main/java/org/apache/commons/text/names/HumanNameParser.java
+++ b/src/main/java/org/apache/commons/text/names/HumanNameParser.java
@@ -63,7 +63,7 @@ import org.apache.commons.lang3.StringUtils;
  *
  * <p>This class is immutable.</p>
  */
-public class HumanNameParser {
+public final class HumanNameParser {
 
     /**
      * Suffixes found.

http://git-wip-us.apache.org/repos/asf/commons-text/blob/9e340643/src/main/java/org/apache/commons/text/names/NameParseException.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/names/NameParseException.java b/src/main/java/org/apache/commons/text/names/NameParseException.java
index b09c2d6..4fe5eda 100644
--- a/src/main/java/org/apache/commons/text/names/NameParseException.java
+++ b/src/main/java/org/apache/commons/text/names/NameParseException.java
@@ -19,7 +19,7 @@ package org.apache.commons.text.names;
 /**
  * Name parse exception.
  */
-public class NameParseException extends RuntimeException {
+public final class NameParseException extends RuntimeException {
 
     /**
      * Serial UID.


[04/12] [text] Make HumanNameParser return a name object. Introduce a new wrapper object for strings to be parsed called NameString.

Posted by br...@apache.org.
Make HumanNameParser return a name object. Introduce a new wrapper object for strings to be parsed called NameString.


Project: http://git-wip-us.apache.org/repos/asf/commons-text/repo
Commit: http://git-wip-us.apache.org/repos/asf/commons-text/commit/685f9a86
Tree: http://git-wip-us.apache.org/repos/asf/commons-text/tree/685f9a86
Diff: http://git-wip-us.apache.org/repos/asf/commons-text/diff/685f9a86

Branch: refs/heads/SANDBOX-498
Commit: 685f9a864d46cc526b14e3a7476465c49d991478
Parents: 9a0cc85
Author: Benedikt Ritter <br...@apache.org>
Authored: Sun Apr 19 16:22:45 2015 +0200
Committer: Benedikt Ritter <br...@apache.org>
Committed: Sun Apr 19 16:22:45 2015 +0200

----------------------------------------------------------------------
 .../commons/text/names/HumanNameParser.java     |  36 ++---
 .../org/apache/commons/text/names/Name.java     | 141 ++++++-------------
 .../apache/commons/text/names/NameString.java   | 134 ++++++++++++++++++
 .../commons/text/names/HumanNameParserTest.java |  24 ++--
 .../commons/text/names/NameStringTest.java      | 104 ++++++++++++++
 .../org/apache/commons/text/names/NameTest.java | 104 --------------
 6 files changed, 315 insertions(+), 228 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/commons-text/blob/685f9a86/src/main/java/org/apache/commons/text/names/HumanNameParser.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/names/HumanNameParser.java b/src/main/java/org/apache/commons/text/names/HumanNameParser.java
index fa2433a..df8e55c 100644
--- a/src/main/java/org/apache/commons/text/names/HumanNameParser.java
+++ b/src/main/java/org/apache/commons/text/names/HumanNameParser.java
@@ -195,14 +195,14 @@ public class HumanNameParser {
     /**
      * Consumes the string and creates the name parts.
      *
-     * @param nameStr the name to parse. Must not be null.
+     * @param name the name to parse. Must not be null.
      * @throws NameParseException if the parser fails to retrieve the name parts.
-     * @throws NullPointerException if nameStr is null.
+     * @throws NullPointerException if name is null.
      */
-    public void parse(String nameStr) {
-        Objects.requireNonNull(nameStr, "Parameter 'nameStr' must not be null.");
+    public Name parse(String name) {
+        Objects.requireNonNull(name, "Parameter 'name' must not be null.");
 
-        Name name = new Name(nameStr);
+        NameString nameString = new NameString(name);
         String suffixes = StringUtils.join(this.suffixes, "\\.*|") + "\\.*";
         String prefixes = StringUtils.join(this.prefixes, " |") + " ";
 
@@ -218,28 +218,30 @@ public class HumanNameParser {
         String firstRegex = "(?i)^([^ ]+)";
 
         // get nickname, if there is one
-        this.nickname = name.chopWithRegex(nicknamesRegex, 2);
+        this.nickname = nameString.chopWithRegex(nicknamesRegex, 2);
 
         // get suffix, if there is one
-        this.suffix = name.chopWithRegex(suffixRegex, 1);
+        this.suffix = nameString.chopWithRegex(suffixRegex, 1);
 
-        // flip the before-comma and after-comma parts of the name
-        name.flip(",");
+        // flip the before-comma and after-comma parts of the nameString
+        nameString.flip(",");
 
-        // get the last name
-        this.last = name.chopWithRegex(lastRegex, 0);
+        // get the last nameString
+        this.last = nameString.chopWithRegex(lastRegex, 0);
 
         // get the first initial, if there is one
-        this.leadingInit = name.chopWithRegex(leadingInitRegex, 1);
+        this.leadingInit = nameString.chopWithRegex(leadingInitRegex, 1);
 
-        // get the first name
-        this.first = name.chopWithRegex(firstRegex, 0);
+        // get the first nameString
+        this.first = nameString.chopWithRegex(firstRegex, 0);
         if (StringUtils.isBlank(this.first)) {
-            throw new NameParseException("Couldn't find a first name in '{" + name.getStr() + "}'");
+            throw new NameParseException("Couldn't find a first name in '{" + nameString.getStr() + "}'");
         }
 
-        // if anything's left, that's the middle name
-        this.middle = name.getStr();
+        // if anything's left, that's the middle nameString
+        this.middle = nameString.getStr();
+        
+        return new Name(leadingInit, first, nickname, middle, last, suffix);
     }
 
 }

http://git-wip-us.apache.org/repos/asf/commons-text/blob/685f9a86/src/main/java/org/apache/commons/text/names/Name.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/names/Name.java b/src/main/java/org/apache/commons/text/names/Name.java
index 0dd2560..3067ba5 100644
--- a/src/main/java/org/apache/commons/text/names/Name.java
+++ b/src/main/java/org/apache/commons/text/names/Name.java
@@ -16,119 +16,70 @@
  */
 package org.apache.commons.text.names;
 
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
+import java.util.Objects;
 
 /**
- * <p>A {@code Name} object that encapsulates a name string, and contains the logic
- * for handling with Regexes.</p>
+ * An object representing the result of parsing a Name.
  *
- * <p>This class is not thread-safe.</p>
+ * <p>This class is immutable.</p>
  */
-public class Name {
+public final class Name {
 
-    /**
-     * Encapsulated string. Not immutable!
-     */
-    private String str;
+    private final String leadingInitial;
+    private final String firstName;
+    private final String nickName;
+    private final String middleName;
+    private final String lastName;
+    private final String suffix;
 
-    /**
-     * Creates a new Name object.
-     *
-     * @param str encapsulated string.
-     */
-    public Name(String str) {
-        this.str = str;
+    Name(String leadingInitial, String firstName, String nickName, String middleName, String lastName, String suffix) {
+        this.leadingInitial = leadingInitial;
+        this.firstName = firstName;
+        this.nickName = nickName;
+        this.middleName = middleName;
+        this.lastName = lastName;
+        this.suffix = suffix;
     }
 
-    /**
-     * Gets the encapsulated string.
-     *
-     * @return encapsulated string
-     */
-    public String getStr() {
-        return str;
+    public String getLeadingInitial() {
+        return leadingInitial;
     }
 
-    /**
-     * Sets the encapsulated string value.
-     *
-     * @param str string value
-     */
-    public void setStr(String str) {
-        this.str = str;
-        this.norm();
+    public String getFirstName() {
+        return firstName;
     }
 
-    /**
-     * Uses a regex to chop off and return part of the namestring.
-     * There are two parts: first, it returns the matched substring,
-     * and then it removes that substring from the encapsulated
-     * string and normalizes it.
-     *
-     * @param regex matches the part of the namestring to chop off
-     * @param submatchIndex which of the parenthesized submatches to use
-     * @return the part of the namestring that got chopped off
-     */
-    public String chopWithRegex(String regex, int submatchIndex) {
-        String chopped = "";
-        Pattern pattern = Pattern.compile(regex);
-        Matcher matcher = pattern.matcher(this.str);
+    public String getNickName() {
+        return nickName;
+    }
 
-        // workdaround for numReplacements in Java
-        int numReplacements = 0;
-        while (matcher.find()) {
-            numReplacements++;
-        }
+    public String getMiddleName() {
+        return middleName;
+    }
 
-        // recreate or the groups are gone
-        pattern = Pattern.compile(regex);
-        matcher = pattern.matcher(this.str);
-        if (matcher.find()) {
-            boolean subset = matcher.groupCount() > submatchIndex;
-            if (subset) {
-                this.str = this.str.replaceAll(regex, " ");
-                if (numReplacements > 1) {
-                    throw new NameParseException("The regex being used to find the name has multiple matches.");
-                }
-                this.norm();
-                return matcher.group(submatchIndex).trim();
-            }
-        }
-        return chopped;
+    public String getLastName() {
+        return lastName;
     }
 
-    /**
-     * Flips the front and back parts of a name with one another.
-     * Front and back are determined by a specified character somewhere in the
-     * middle of the string.
-     *
-     * @param flipAroundChar the character(s) demarcating the two halves you want to flip.
-     * @throws NameParseException if a regex fails or a condition is not expected
-     */
-    public void flip(String flipAroundChar) {
-        String[] parts = this.str.split(flipAroundChar);
-        if (parts != null) {
-            if (parts.length == 2) {
-                this.str = String.format("%s %s", parts[1], parts[0]);
-                this.norm();
-            } else if (parts.length > 2) {
-                throw new NameParseException(
-                        "Can't flip around multiple '" + flipAroundChar + "' characters in namestring.");
-            }
-        }
+    public String getSuffix() {
+        return suffix;
     }
 
-    /**
-     * <p>Removes extra whitespace and punctuation from {@code this.str}.</p>
-     *
-     * <p>Strips whitespace chars from ends, strips redundant whitespace, converts
-     * whitespace chars to " ".</p>
-     */
-    public void norm() {
-        this.str = this.str.trim();
-        this.str = this.str.replaceAll("\\s+", " ");
-        this.str = this.str.replaceAll(",$", " ");
+    @Override
+    public boolean equals(Object o) {
+        if (this == o) return true;
+        if (o == null || getClass() != o.getClass()) return false;
+        Name name = (Name) o;
+        return Objects.equals(leadingInitial, name.leadingInitial) &&
+                Objects.equals(firstName, name.firstName) &&
+                Objects.equals(nickName, name.nickName) &&
+                Objects.equals(middleName, name.middleName) &&
+                Objects.equals(lastName, name.lastName) &&
+                Objects.equals(suffix, name.suffix);
     }
 
+    @Override
+    public int hashCode() {
+        return Objects.hash(leadingInitial, firstName, nickName, middleName, lastName, suffix);
+    }
 }

http://git-wip-us.apache.org/repos/asf/commons-text/blob/685f9a86/src/main/java/org/apache/commons/text/names/NameString.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/names/NameString.java b/src/main/java/org/apache/commons/text/names/NameString.java
new file mode 100644
index 0000000..8f606f2
--- /dev/null
+++ b/src/main/java/org/apache/commons/text/names/NameString.java
@@ -0,0 +1,134 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.text.names;
+
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+/**
+ * A wrapper around a String representing a Name to parse. Contains the logic
+ * for handling executing Regexes on the wrapped name string.
+ *
+ * <p>This class is not thread-safe.</p>
+ */
+final class NameString {
+
+    /**
+     * Encapsulated string. Not immutable!
+     */
+    private String str;
+
+    /**
+     * Creates a new Name object.
+     *
+     * @param str encapsulated string.
+     */
+    public NameString(String str) {
+        this.str = str;
+    }
+
+    /**
+     * Gets the encapsulated string.
+     *
+     * @return encapsulated string
+     */
+    public String getStr() {
+        return str;
+    }
+
+    /**
+     * Sets the encapsulated string value.
+     *
+     * @param str string value
+     */
+    public void setStr(String str) {
+        this.str = str;
+        this.norm();
+    }
+
+    /**
+     * Uses a regex to chop off and return part of the namestring.
+     * There are two parts: first, it returns the matched substring,
+     * and then it removes that substring from the encapsulated
+     * string and normalizes it.
+     *
+     * @param regex matches the part of the namestring to chop off
+     * @param submatchIndex which of the parenthesized submatches to use
+     * @return the part of the namestring that got chopped off
+     */
+    public String chopWithRegex(String regex, int submatchIndex) {
+        String chopped = "";
+        Pattern pattern = Pattern.compile(regex);
+        Matcher matcher = pattern.matcher(this.str);
+
+        // workdaround for numReplacements in Java
+        int numReplacements = 0;
+        while (matcher.find()) {
+            numReplacements++;
+        }
+
+        // recreate or the groups are gone
+        pattern = Pattern.compile(regex);
+        matcher = pattern.matcher(this.str);
+        if (matcher.find()) {
+            boolean subset = matcher.groupCount() > submatchIndex;
+            if (subset) {
+                this.str = this.str.replaceAll(regex, " ");
+                if (numReplacements > 1) {
+                    throw new NameParseException("The regex being used to find the name has multiple matches.");
+                }
+                this.norm();
+                return matcher.group(submatchIndex).trim();
+            }
+        }
+        return chopped;
+    }
+
+    /**
+     * Flips the front and back parts of a name with one another.
+     * Front and back are determined by a specified character somewhere in the
+     * middle of the string.
+     *
+     * @param flipAroundChar the character(s) demarcating the two halves you want to flip.
+     * @throws NameParseException if a regex fails or a condition is not expected
+     */
+    public void flip(String flipAroundChar) {
+        String[] parts = this.str.split(flipAroundChar);
+        if (parts != null) {
+            if (parts.length == 2) {
+                this.str = String.format("%s %s", parts[1], parts[0]);
+                this.norm();
+            } else if (parts.length > 2) {
+                throw new NameParseException(
+                        "Can't flip around multiple '" + flipAroundChar + "' characters in namestring.");
+            }
+        }
+    }
+
+    /**
+     * <p>Removes extra whitespace and punctuation from {@code this.str}.</p>
+     *
+     * <p>Strips whitespace chars from ends, strips redundant whitespace, converts
+     * whitespace chars to " ".</p>
+     */
+    public void norm() {
+        this.str = this.str.trim();
+        this.str = this.str.replaceAll("\\s+", " ");
+        this.str = this.str.replaceAll(",$", " ");
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/commons-text/blob/685f9a86/src/test/java/org/apache/commons/text/names/HumanNameParserTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/commons/text/names/HumanNameParserTest.java b/src/test/java/org/apache/commons/text/names/HumanNameParserTest.java
index d43d2be..d059ed4 100644
--- a/src/test/java/org/apache/commons/text/names/HumanNameParserTest.java
+++ b/src/test/java/org/apache/commons/text/names/HumanNameParserTest.java
@@ -71,26 +71,26 @@ public class HumanNameParserTest {
      */
     private void validateRecord(CSVRecord record) {
         HumanNameParser parser = new HumanNameParser();
-        parser.parse(record.get(Colums.Name));
+        Name result = parser.parse(record.get(Colums.Name));
 
         long recordNum = record.getRecordNumber();
         assertThat("Wrong LeadingInit in record " + recordNum,
-                parser.getLeadingInit(), equalTo(record.get(Colums.LeadingInit)));
-        
+                result.getLeadingInitial(), equalTo(record.get(Colums.LeadingInit)));
+
         assertThat("Wrong FirstName in record " + recordNum,
-                parser.getFirst(), equalTo(record.get(Colums.FirstName)));
-        
+                result.getFirstName(), equalTo(record.get(Colums.FirstName)));
+
         assertThat("Wrong NickName in record " + recordNum,
-                parser.getNickname(), equalTo(record.get(Colums.NickName)));
-        
+                result.getNickName(), equalTo(record.get(Colums.NickName)));
+
         assertThat("Wrong MiddleName in record " + recordNum,
-                parser.getMiddle(), equalTo(record.get(Colums.MiddleName)));
-        
+                result.getMiddleName(), equalTo(record.get(Colums.MiddleName)));
+
         assertThat("Wrong LastName in record " + recordNum,
-                parser.getLast(), equalTo(record.get(Colums.LastName)));
-        
+                result.getLastName(), equalTo(record.get(Colums.LastName)));
+
         assertThat("Wrong Suffix in record " + recordNum,
-                parser.getSuffix(), equalTo(record.get(Colums.Suffix)));
+                result.getSuffix(), equalTo(record.get(Colums.Suffix)));
     }
 
     private enum Colums {

http://git-wip-us.apache.org/repos/asf/commons-text/blob/685f9a86/src/test/java/org/apache/commons/text/names/NameStringTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/commons/text/names/NameStringTest.java b/src/test/java/org/apache/commons/text/names/NameStringTest.java
new file mode 100644
index 0000000..494c70b
--- /dev/null
+++ b/src/test/java/org/apache/commons/text/names/NameStringTest.java
@@ -0,0 +1,104 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.text.names;
+
+import static org.junit.Assert.assertEquals;
+
+import org.junit.Before;
+import org.junit.Test;
+
+/**
+ * Tests for {@code Name} and {@code HumanNameParser}. Utilizes the same
+ * input file as the PHP library 0.2 version.
+ */
+public class NameStringTest {
+
+    private NameString nameString;
+
+    @Before
+    public void setUp() {
+        nameString = new NameString("Björn O'Malley");
+    }
+
+    @Test
+    public void testSetStrRemovesWhitespaceAtEnds() {
+        nameString.setStr("    Björn O'Malley \r\n");
+        assertEquals(
+            "Björn O'Malley",
+            nameString.getStr()
+        );
+    }
+
+    @Test
+    public void testSetStrRemovesRedudentantWhitespace(){
+        nameString.setStr(" Björn    O'Malley");
+        assertEquals(
+            "Björn O'Malley",
+            nameString.getStr()
+        );
+    }
+
+    @Test
+    public void testChopWithRegexReturnsChoppedSubstring(){
+        nameString.setStr("Björn O'Malley");
+        assertEquals(
+            "Björn",
+            nameString.chopWithRegex("(^([^ ]+))(.+)", 1)
+        );
+    }
+
+    @Test
+    public void testChopWithRegexChopsStartOffNameStr(){
+        nameString.setStr("Björn O'Malley");
+        nameString.chopWithRegex("(^[^ ]+)", 0);
+        assertEquals(
+                "O'Malley",
+            nameString.getStr()
+        );
+    }
+
+    @Test
+    public void testChopWithRegexChopsEndOffNameStr(){
+        nameString.setStr("Björn O'Malley");
+        nameString.chopWithRegex("( (.+)$)", 1);
+        assertEquals(
+            "Björn",
+            nameString.getStr()
+        );
+    }
+
+    @Test
+    public void testChopWithRegexChopsMiddleFromNameStr(){
+        nameString.setStr("Björn 'Bill' O'Malley");
+        nameString.chopWithRegex("( '[^']+' )", 0);
+        assertEquals(
+            "Björn O'Malley",
+            nameString.getStr()
+        );
+    }
+
+    @Test
+    public void testFlip() {
+        nameString.setStr("O'Malley, Björn");
+        nameString.flip(",");
+        assertEquals(
+            "Björn O'Malley",
+            nameString.getStr()
+        );
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/commons-text/blob/685f9a86/src/test/java/org/apache/commons/text/names/NameTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/commons/text/names/NameTest.java b/src/test/java/org/apache/commons/text/names/NameTest.java
deleted file mode 100644
index 7822e92..0000000
--- a/src/test/java/org/apache/commons/text/names/NameTest.java
+++ /dev/null
@@ -1,104 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.commons.text.names;
-
-import static org.junit.Assert.assertEquals;
-
-import org.junit.Before;
-import org.junit.Test;
-
-/**
- * Tests for {@code Name} and {@code HumanNameParser}. Utilizes the same
- * input file as the PHP library 0.2 version.
- */
-public class NameTest {
-
-    protected Name object;
-
-    @Before
-    public void setUp() {
-        object = new Name("Björn O'Malley");
-    }
-
-    @Test
-    public void testSetStrRemovesWhitespaceAtEnds() {
-        object.setStr("    Björn O'Malley \r\n");
-        assertEquals(
-            "Björn O'Malley",
-            object.getStr()
-        );
-    }
-
-    @Test
-    public void testSetStrRemovesRedudentantWhitespace(){
-        object.setStr(" Björn    O'Malley");
-        assertEquals(
-            "Björn O'Malley",
-            object.getStr()
-        );
-    }
-
-    @Test
-    public void testChopWithRegexReturnsChoppedSubstring(){
-        object.setStr("Björn O'Malley");
-        assertEquals(
-            "Björn",
-            object.chopWithRegex("(^([^ ]+))(.+)", 1)
-        );
-    }
-
-    @Test
-    public void testChopWithRegexChopsStartOffNameStr(){
-        object.setStr("Björn O'Malley");
-        object.chopWithRegex("(^[^ ]+)", 0);
-        assertEquals(
-                "O'Malley",
-            object.getStr()
-        );
-    }
-
-    @Test
-    public void testChopWithRegexChopsEndOffNameStr(){
-        object.setStr("Björn O'Malley");
-        object.chopWithRegex("( (.+)$)", 1);
-        assertEquals(
-            "Björn",
-            object.getStr()
-        );
-    }
-
-    @Test
-    public void testChopWithRegexChopsMiddleFromNameStr(){
-        object.setStr("Björn 'Bill' O'Malley");
-        object.chopWithRegex("( '[^']+' )", 0);
-        assertEquals(
-            "Björn O'Malley",
-            object.getStr()
-        );
-    }
-
-    @Test
-    public void testFlip() {
-        object.setStr("O'Malley, Björn");
-        object.flip(",");
-        assertEquals(
-            "Björn O'Malley",
-            object.getStr()
-        );
-    }
-
-}


[07/12] [text] Fix typo

Posted by br...@apache.org.
Fix typo


Project: http://git-wip-us.apache.org/repos/asf/commons-text/repo
Commit: http://git-wip-us.apache.org/repos/asf/commons-text/commit/a942b4c0
Tree: http://git-wip-us.apache.org/repos/asf/commons-text/tree/a942b4c0
Diff: http://git-wip-us.apache.org/repos/asf/commons-text/diff/a942b4c0

Branch: refs/heads/SANDBOX-498
Commit: a942b4c02194a6f544f129e89e0f399d51c5c01a
Parents: bbba0a3
Author: Benedikt Ritter <br...@apache.org>
Authored: Sun Apr 19 16:31:01 2015 +0200
Committer: Benedikt Ritter <br...@apache.org>
Committed: Sun Apr 19 16:31:01 2015 +0200

----------------------------------------------------------------------
 .../commons/text/names/HumanNameParserTest.java     | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/commons-text/blob/a942b4c0/src/test/java/org/apache/commons/text/names/HumanNameParserTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/commons/text/names/HumanNameParserTest.java b/src/test/java/org/apache/commons/text/names/HumanNameParserTest.java
index 314a949..f6c9ba6 100644
--- a/src/test/java/org/apache/commons/text/names/HumanNameParserTest.java
+++ b/src/test/java/org/apache/commons/text/names/HumanNameParserTest.java
@@ -71,29 +71,29 @@ public class HumanNameParserTest {
      * @param record a CSVRecord representing one record in the input file.
      */
     private void validateRecord(CSVRecord record) {
-        Name result = nameParser.parse(record.get(Colums.Name));
+        Name result = nameParser.parse(record.get(Columns.Name));
 
         long recordNum = record.getRecordNumber();
         assertThat("Wrong LeadingInit in record " + recordNum,
-                result.getLeadingInitial(), equalTo(record.get(Colums.LeadingInit)));
+                result.getLeadingInitial(), equalTo(record.get(Columns.LeadingInit)));
 
         assertThat("Wrong FirstName in record " + recordNum,
-                result.getFirstName(), equalTo(record.get(Colums.FirstName)));
+                result.getFirstName(), equalTo(record.get(Columns.FirstName)));
 
         assertThat("Wrong NickName in record " + recordNum,
-                result.getNickName(), equalTo(record.get(Colums.NickName)));
+                result.getNickName(), equalTo(record.get(Columns.NickName)));
 
         assertThat("Wrong MiddleName in record " + recordNum,
-                result.getMiddleName(), equalTo(record.get(Colums.MiddleName)));
+                result.getMiddleName(), equalTo(record.get(Columns.MiddleName)));
 
         assertThat("Wrong LastName in record " + recordNum,
-                result.getLastName(), equalTo(record.get(Colums.LastName)));
+                result.getLastName(), equalTo(record.get(Columns.LastName)));
 
         assertThat("Wrong Suffix in record " + recordNum,
-                result.getSuffix(), equalTo(record.get(Colums.Suffix)));
+                result.getSuffix(), equalTo(record.get(Columns.Suffix)));
     }
 
-    private enum Colums {
+    private enum Columns {
         Name,LeadingInit,FirstName,NickName,MiddleName,LastName,Suffix
     }
 }


[10/12] [text] Condition will always be true

Posted by br...@apache.org.
Condition will always be true


Project: http://git-wip-us.apache.org/repos/asf/commons-text/repo
Commit: http://git-wip-us.apache.org/repos/asf/commons-text/commit/b1c7e564
Tree: http://git-wip-us.apache.org/repos/asf/commons-text/tree/b1c7e564
Diff: http://git-wip-us.apache.org/repos/asf/commons-text/diff/b1c7e564

Branch: refs/heads/SANDBOX-498
Commit: b1c7e564251e7a404aa3d021c282349150fd4061
Parents: ed985cd
Author: Benedikt Ritter <br...@apache.org>
Authored: Sun Apr 19 16:45:49 2015 +0200
Committer: Benedikt Ritter <br...@apache.org>
Committed: Sun Apr 19 16:45:49 2015 +0200

----------------------------------------------------------------------
 .../org/apache/commons/text/names/NameString.java     | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/commons-text/blob/b1c7e564/src/main/java/org/apache/commons/text/names/NameString.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/names/NameString.java b/src/main/java/org/apache/commons/text/names/NameString.java
index 54e2753..21898d3 100644
--- a/src/main/java/org/apache/commons/text/names/NameString.java
+++ b/src/main/java/org/apache/commons/text/names/NameString.java
@@ -98,14 +98,12 @@ final class NameString {
      */
     void flip(String flipAroundChar) {
         String[] parts = this.str.split(flipAroundChar);
-        if (parts != null) {
-            if (parts.length == 2) {
-                this.str = String.format("%s %s", parts[1], parts[0]);
-                this.norm();
-            } else if (parts.length > 2) {
-                throw new NameParseException(
-                        "Can't flip around multiple '" + flipAroundChar + "' characters in namestring.");
-            }
+        if (parts.length == 2) {
+            this.str = String.format("%s %s", parts[1], parts[0]);
+            this.norm();
+        } else if (parts.length > 2) {
+            throw new NameParseException(
+                    "Can't flip around multiple '" + flipAroundChar + "' characters in namestring.");
         }
     }
 


[05/12] [text] Remove state from HumanNameParser, making it immutable

Posted by br...@apache.org.
Remove state from HumanNameParser, making it immutable


Project: http://git-wip-us.apache.org/repos/asf/commons-text/repo
Commit: http://git-wip-us.apache.org/repos/asf/commons-text/commit/1f6c5dae
Tree: http://git-wip-us.apache.org/repos/asf/commons-text/tree/1f6c5dae
Diff: http://git-wip-us.apache.org/repos/asf/commons-text/diff/1f6c5dae

Branch: refs/heads/SANDBOX-498
Commit: 1f6c5daecded67a17c07371a564f74ef623b3f29
Parents: 685f9a8
Author: Benedikt Ritter <br...@apache.org>
Authored: Sun Apr 19 16:28:37 2015 +0200
Committer: Benedikt Ritter <br...@apache.org>
Committed: Sun Apr 19 16:28:37 2015 +0200

----------------------------------------------------------------------
 .../commons/text/names/HumanNameParser.java     | 141 +++----------------
 .../org/apache/commons/text/names/Name.java     |  32 +++++
 2 files changed, 51 insertions(+), 122 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/commons-text/blob/1f6c5dae/src/main/java/org/apache/commons/text/names/HumanNameParser.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/names/HumanNameParser.java b/src/main/java/org/apache/commons/text/names/HumanNameParser.java
index df8e55c..c47abde 100644
--- a/src/main/java/org/apache/commons/text/names/HumanNameParser.java
+++ b/src/main/java/org/apache/commons/text/names/HumanNameParser.java
@@ -61,135 +61,32 @@ import org.apache.commons.lang3.StringUtils;
  * <p>This implementation is based on the Java implementation, with additions
  * suggested in <a href="https://issues.apache.org/jira/browse/SANDBOX-487">SANDBOX-487</a>.</p>
  *
- * <p>This class is not thread-safe.</p>
+ * <p>This class is immutable.</p>
  */
 public class HumanNameParser {
 
     /**
-     * Leading init part.
-     */
-    private String leadingInit;
-    /**
-     * First name.
-     */
-    private String first;
-    /**
-     * Single nickname found in the name input.
-     */
-    private String nickname;
-    /**
-     * Middle name.
-     */
-    private String middle;
-    /**
-     * Last name.
-     */
-    private String last;
-    /**
-     * Name suffix.
-     */
-    private String suffix;
-    /**
      * Suffixes found.
      */
-    private List<String> suffixes;
+    private final List<String> suffixes;
     /**
      * Prefixes found.
      */
-    private List<String> prefixes;
+    private final List<String> prefixes;
 
     /**
      * Creates a parser given a string name.
      */
     public HumanNameParser() {
-        this.leadingInit = "";
-        this.first = "";
-        this.nickname = "";
-        this.middle = "";
-        this.last = "";
-        this.suffix = "";
-
-        this.suffixes = Arrays.asList(new String[]{
+        // TODO make this configurable
+        this.suffixes = Arrays.asList(
                 "esq", "esquire", "jr",
-                "sr", "2", "ii", "iii", "iv"});
-        this.prefixes = Arrays
-            .asList(new String[] {
+                "sr", "2", "ii", "iii", "iv");
+        this.prefixes = Arrays.asList(
                     "bar", "ben", "bin", "da", "dal",
                     "de la", "de", "del", "der", "di", "ibn", "la", "le",
                     "san", "st", "ste", "van", "van der", "van den", "vel",
-                    "von" });
-    }
-
-    /**
-     * Gets the leading init part of the name.
-     *
-     * @return the leading init part of the name
-     */
-    public String getLeadingInit() {
-        return leadingInit;
-    }
-
-    /**
-     * Gets the first name.
-     *
-     * @return first name
-     */
-    public String getFirst() {
-        return first;
-    }
-
-    /**
-     * Gets the nickname.
-     *
-     * @return the nickname
-     */
-    public String getNickname() {
-        return nickname;
-    }
-
-    /**
-     * Gets the middle name.
-     *
-     * @return the middle name
-     */
-    public String getMiddle() {
-        return middle;
-    }
-
-    /**
-     * Gets the last name.
-     *
-     * @return the last name
-     */
-    public String getLast() {
-        return last;
-    }
-
-    /**
-     * Gets the suffix part of the name.
-     *
-     * @return the name suffix
-     */
-    public String getSuffix() {
-        return suffix;
-    }
-
-    /**
-     * Gets the name suffixes.
-     *
-     * @return the name suffixes
-     */
-    public List<String> getSuffixes() {
-        return suffixes;
-    }
-
-    /**
-     * Gets the name prefixes.
-     *
-     * @return the name prefixes
-     */
-    public List<String> getPrefixes() {
-        return prefixes;
+                    "von" );
     }
 
     /**
@@ -218,28 +115,28 @@ public class HumanNameParser {
         String firstRegex = "(?i)^([^ ]+)";
 
         // get nickname, if there is one
-        this.nickname = nameString.chopWithRegex(nicknamesRegex, 2);
+        String nickname = nameString.chopWithRegex(nicknamesRegex, 2);
 
         // get suffix, if there is one
-        this.suffix = nameString.chopWithRegex(suffixRegex, 1);
+        String suffix = nameString.chopWithRegex(suffixRegex, 1);
 
-        // flip the before-comma and after-comma parts of the nameString
+        // flip the before-comma and after-comma parts of the name
         nameString.flip(",");
 
-        // get the last nameString
-        this.last = nameString.chopWithRegex(lastRegex, 0);
+        // get the last name
+        String last = nameString.chopWithRegex(lastRegex, 0);
 
         // get the first initial, if there is one
-        this.leadingInit = nameString.chopWithRegex(leadingInitRegex, 1);
+        String leadingInit = nameString.chopWithRegex(leadingInitRegex, 1);
 
-        // get the first nameString
-        this.first = nameString.chopWithRegex(firstRegex, 0);
-        if (StringUtils.isBlank(this.first)) {
+        // get the first name
+        String first = nameString.chopWithRegex(firstRegex, 0);
+        if (StringUtils.isBlank(first)) {
             throw new NameParseException("Couldn't find a first name in '{" + nameString.getStr() + "}'");
         }
 
-        // if anything's left, that's the middle nameString
-        this.middle = nameString.getStr();
+        // if anything's left, that's the middle name
+        String middle = nameString.getStr();
         
         return new Name(leadingInit, first, nickname, middle, last, suffix);
     }

http://git-wip-us.apache.org/repos/asf/commons-text/blob/1f6c5dae/src/main/java/org/apache/commons/text/names/Name.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/names/Name.java b/src/main/java/org/apache/commons/text/names/Name.java
index 3067ba5..7e32de4 100644
--- a/src/main/java/org/apache/commons/text/names/Name.java
+++ b/src/main/java/org/apache/commons/text/names/Name.java
@@ -41,26 +41,58 @@ public final class Name {
         this.suffix = suffix;
     }
 
+    // TODO Add an example to each getter
+
+    /**
+     * Gets the leading init part of the name.
+     *
+     * @return the leading init part of the name
+     */
     public String getLeadingInitial() {
         return leadingInitial;
     }
 
+    /**
+     * Gets the first name.
+     *
+     * @return first name
+     */
     public String getFirstName() {
         return firstName;
     }
 
+    /**
+     * Gets the nickname.
+     *
+     * @return the nickname
+     */
     public String getNickName() {
         return nickName;
     }
 
+    /**
+     * Gets the middle name.
+     *
+     * @return the middle name
+     */
     public String getMiddleName() {
         return middleName;
     }
 
+    /**
+     * Gets the last name.
+     *
+     * @return the last name
+     */
     public String getLastName() {
         return lastName;
     }
 
+    /**
+     * Gets the suffix part of the name.
+     *
+     * @return the name suffix
+     */
     public String getSuffix() {
         return suffix;
     }


[06/12] [text] Use a shared parser instance for tests

Posted by br...@apache.org.
Use a shared parser instance for tests


Project: http://git-wip-us.apache.org/repos/asf/commons-text/repo
Commit: http://git-wip-us.apache.org/repos/asf/commons-text/commit/bbba0a32
Tree: http://git-wip-us.apache.org/repos/asf/commons-text/tree/bbba0a32
Diff: http://git-wip-us.apache.org/repos/asf/commons-text/diff/bbba0a32

Branch: refs/heads/SANDBOX-498
Commit: bbba0a327b7ad8873d176254ec2a550757911bda
Parents: 1f6c5da
Author: Benedikt Ritter <br...@apache.org>
Authored: Sun Apr 19 16:30:00 2015 +0200
Committer: Benedikt Ritter <br...@apache.org>
Committed: Sun Apr 19 16:30:00 2015 +0200

----------------------------------------------------------------------
 .../commons/text/names/HumanNameParserTest.java   | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/commons-text/blob/bbba0a32/src/test/java/org/apache/commons/text/names/HumanNameParserTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/commons/text/names/HumanNameParserTest.java b/src/test/java/org/apache/commons/text/names/HumanNameParserTest.java
index d059ed4..314a949 100644
--- a/src/test/java/org/apache/commons/text/names/HumanNameParserTest.java
+++ b/src/test/java/org/apache/commons/text/names/HumanNameParserTest.java
@@ -33,32 +33,33 @@ import org.junit.Test;
  */
 public class HumanNameParserTest {
 
-    private CSVParser parser;
+    private CSVParser inputParser;
+    private HumanNameParser nameParser;
 
     @Before
     public void setUp() throws Exception {
-        parser = CSVParser.parse(
+        inputParser = CSVParser.parse(
                 HumanNameParserTest.class.getResource("testNames.txt"), 
                 Charset.forName("UTF-8"), 
                 CSVFormat.DEFAULT.withDelimiter('|').withHeader());
+        nameParser = new HumanNameParser();
     }
 
     @After
     public void tearDown() throws Exception {
-        if (parser != null) {
-            parser.close();
+        if (inputParser != null) {
+            inputParser.close();
         }
     }
 
     @Test(expected = NullPointerException.class)
     public void shouldThrowNullPointerException_WhenNullIsParsed() throws Exception {
-        HumanNameParser parser = new HumanNameParser();
-        parser.parse(null);
+        nameParser.parse(null);
     }
 
     @Test
     public void testInputs() {
-        for (CSVRecord record : parser) {
+        for (CSVRecord record : inputParser) {
             validateRecord(record);
         }
     }
@@ -70,8 +71,7 @@ public class HumanNameParserTest {
      * @param record a CSVRecord representing one record in the input file.
      */
     private void validateRecord(CSVRecord record) {
-        HumanNameParser parser = new HumanNameParser();
-        Name result = parser.parse(record.get(Colums.Name));
+        Name result = nameParser.parse(record.get(Colums.Name));
 
         long recordNum = record.getRecordNumber();
         assertThat("Wrong LeadingInit in record " + recordNum,


[09/12] [text] Drop unused code from NameString and clean up NameStringTest

Posted by br...@apache.org.
Drop unused code from NameString and clean up NameStringTest


Project: http://git-wip-us.apache.org/repos/asf/commons-text/repo
Commit: http://git-wip-us.apache.org/repos/asf/commons-text/commit/ed985cd5
Tree: http://git-wip-us.apache.org/repos/asf/commons-text/tree/ed985cd5
Diff: http://git-wip-us.apache.org/repos/asf/commons-text/diff/ed985cd5

Branch: refs/heads/SANDBOX-498
Commit: ed985cd51220e956f516acecf1039defd0141d34
Parents: 9e34064
Author: Benedikt Ritter <br...@apache.org>
Authored: Sun Apr 19 16:44:32 2015 +0200
Committer: Benedikt Ritter <br...@apache.org>
Committed: Sun Apr 19 16:44:32 2015 +0200

----------------------------------------------------------------------
 .../commons/text/names/HumanNameParser.java     |  5 +-
 .../apache/commons/text/names/NameString.java   | 24 ++-----
 .../commons/text/names/NameStringTest.java      | 67 ++++++--------------
 3 files changed, 30 insertions(+), 66 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/commons-text/blob/ed985cd5/src/main/java/org/apache/commons/text/names/HumanNameParser.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/names/HumanNameParser.java b/src/main/java/org/apache/commons/text/names/HumanNameParser.java
index a29e375..b5c0aa3 100644
--- a/src/main/java/org/apache/commons/text/names/HumanNameParser.java
+++ b/src/main/java/org/apache/commons/text/names/HumanNameParser.java
@@ -100,6 +100,7 @@ public final class HumanNameParser {
         Objects.requireNonNull(name, "Parameter 'name' must not be null.");
 
         NameString nameString = new NameString(name);
+        // TODO compile regexes only once when the parser is created
         String suffixes = StringUtils.join(this.suffixes, "\\.*|") + "\\.*";
         String prefixes = StringUtils.join(this.prefixes, " |") + " ";
 
@@ -132,11 +133,11 @@ public final class HumanNameParser {
         // get the first name
         String first = nameString.chopWithRegex(firstRegex, 0);
         if (StringUtils.isBlank(first)) {
-            throw new NameParseException("Couldn't find a first name in '{" + nameString.getStr() + "}'");
+            throw new NameParseException("Couldn't find a first name in '{" + nameString.getWrappedString() + "}'");
         }
 
         // if anything's left, that's the middle name
-        String middle = nameString.getStr();
+        String middle = nameString.getWrappedString();
         
         return new Name(leadingInit, first, nickname, middle, last, suffix);
     }

http://git-wip-us.apache.org/repos/asf/commons-text/blob/ed985cd5/src/main/java/org/apache/commons/text/names/NameString.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/names/NameString.java b/src/main/java/org/apache/commons/text/names/NameString.java
index 8f606f2..54e2753 100644
--- a/src/main/java/org/apache/commons/text/names/NameString.java
+++ b/src/main/java/org/apache/commons/text/names/NameString.java
@@ -37,30 +37,20 @@ final class NameString {
      *
      * @param str encapsulated string.
      */
-    public NameString(String str) {
+    NameString(String str) {
         this.str = str;
     }
 
     /**
-     * Gets the encapsulated string.
+     * Gets the wrapped string.
      *
-     * @return encapsulated string
+     * @return wrapped string
      */
-    public String getStr() {
+    String getWrappedString() {
         return str;
     }
 
     /**
-     * Sets the encapsulated string value.
-     *
-     * @param str string value
-     */
-    public void setStr(String str) {
-        this.str = str;
-        this.norm();
-    }
-
-    /**
      * Uses a regex to chop off and return part of the namestring.
      * There are two parts: first, it returns the matched substring,
      * and then it removes that substring from the encapsulated
@@ -70,7 +60,7 @@ final class NameString {
      * @param submatchIndex which of the parenthesized submatches to use
      * @return the part of the namestring that got chopped off
      */
-    public String chopWithRegex(String regex, int submatchIndex) {
+    String chopWithRegex(String regex, int submatchIndex) {
         String chopped = "";
         Pattern pattern = Pattern.compile(regex);
         Matcher matcher = pattern.matcher(this.str);
@@ -106,7 +96,7 @@ final class NameString {
      * @param flipAroundChar the character(s) demarcating the two halves you want to flip.
      * @throws NameParseException if a regex fails or a condition is not expected
      */
-    public void flip(String flipAroundChar) {
+    void flip(String flipAroundChar) {
         String[] parts = this.str.split(flipAroundChar);
         if (parts != null) {
             if (parts.length == 2) {
@@ -125,7 +115,7 @@ final class NameString {
      * <p>Strips whitespace chars from ends, strips redundant whitespace, converts
      * whitespace chars to " ".</p>
      */
-    public void norm() {
+    private void norm() {
         this.str = this.str.trim();
         this.str = this.str.replaceAll("\\s+", " ");
         this.str = this.str.replaceAll(",$", " ");

http://git-wip-us.apache.org/repos/asf/commons-text/blob/ed985cd5/src/test/java/org/apache/commons/text/names/NameStringTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/commons/text/names/NameStringTest.java b/src/test/java/org/apache/commons/text/names/NameStringTest.java
index 494c70b..e087579 100644
--- a/src/test/java/org/apache/commons/text/names/NameStringTest.java
+++ b/src/test/java/org/apache/commons/text/names/NameStringTest.java
@@ -16,7 +16,8 @@
  */
 package org.apache.commons.text.names;
 
-import static org.junit.Assert.assertEquals;
+import static org.hamcrest.Matchers.equalTo;
+import static org.junit.Assert.assertThat;
 
 import org.junit.Before;
 import org.junit.Test;
@@ -35,70 +36,42 @@ public class NameStringTest {
     }
 
     @Test
-    public void testSetStrRemovesWhitespaceAtEnds() {
-        nameString.setStr("    Björn O'Malley \r\n");
-        assertEquals(
-            "Björn O'Malley",
-            nameString.getStr()
-        );
-    }
+    public void testChopWithRegexReturnsChoppedSubstring() {
+        NameString nameString = new NameString("Björn O'Malley");
 
-    @Test
-    public void testSetStrRemovesRedudentantWhitespace(){
-        nameString.setStr(" Björn    O'Malley");
-        assertEquals(
-            "Björn O'Malley",
-            nameString.getStr()
-        );
-    }
-
-    @Test
-    public void testChopWithRegexReturnsChoppedSubstring(){
-        nameString.setStr("Björn O'Malley");
-        assertEquals(
-            "Björn",
-            nameString.chopWithRegex("(^([^ ]+))(.+)", 1)
-        );
+        assertThat(nameString.chopWithRegex("(^([^ ]+))(.+)", 1), equalTo("Björn"));
     }
 
     @Test
-    public void testChopWithRegexChopsStartOffNameStr(){
-        nameString.setStr("Björn O'Malley");
+    public void testChopWithRegexChopsStartOffNameStr() {
+        NameString nameString = new NameString("Björn O'Malley");
         nameString.chopWithRegex("(^[^ ]+)", 0);
-        assertEquals(
-                "O'Malley",
-            nameString.getStr()
-        );
+
+        assertThat(nameString.getWrappedString(), equalTo("O'Malley"));
     }
 
     @Test
-    public void testChopWithRegexChopsEndOffNameStr(){
-        nameString.setStr("Björn O'Malley");
+    public void testChopWithRegexChopsEndOffNameStr() {
+        NameString nameString = new NameString("Björn O'Malley");
         nameString.chopWithRegex("( (.+)$)", 1);
-        assertEquals(
-            "Björn",
-            nameString.getStr()
-        );
+
+        assertThat(nameString.getWrappedString(), equalTo("Björn"));
     }
 
     @Test
-    public void testChopWithRegexChopsMiddleFromNameStr(){
-        nameString.setStr("Björn 'Bill' O'Malley");
+    public void testChopWithRegexChopsMiddleFromNameStr() {
+        NameString nameString = new NameString("Björn 'Bill' O'Malley");
         nameString.chopWithRegex("( '[^']+' )", 0);
-        assertEquals(
-            "Björn O'Malley",
-            nameString.getStr()
-        );
+
+        assertThat(nameString.getWrappedString(), equalTo("Björn O'Malley"));
     }
 
     @Test
     public void testFlip() {
-        nameString.setStr("O'Malley, Björn");
+        NameString nameString = new NameString("O'Malley, Björn");
         nameString.flip(",");
-        assertEquals(
-            "Björn O'Malley",
-            nameString.getStr()
-        );
+
+        assertThat(nameString.getWrappedString(), equalTo("Björn O'Malley"));
     }
 
 }


[12/12] [text] Add SANDBOX-498 to the list of fixed issues

Posted by br...@apache.org.
Add SANDBOX-498 to the list of fixed issues


Project: http://git-wip-us.apache.org/repos/asf/commons-text/repo
Commit: http://git-wip-us.apache.org/repos/asf/commons-text/commit/c1372c1f
Tree: http://git-wip-us.apache.org/repos/asf/commons-text/tree/c1372c1f
Diff: http://git-wip-us.apache.org/repos/asf/commons-text/diff/c1372c1f

Branch: refs/heads/SANDBOX-498
Commit: c1372c1f9754434995c9a91fe47508946ff5744f
Parents: 6d047a4
Author: Benedikt Ritter <br...@apache.org>
Authored: Sun Apr 19 17:14:22 2015 +0200
Committer: Benedikt Ritter <br...@apache.org>
Committed: Sun Apr 19 17:14:22 2015 +0200

----------------------------------------------------------------------
 src/changes/changes.xml | 1 +
 1 file changed, 1 insertion(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/commons-text/blob/c1372c1f/src/changes/changes.xml
----------------------------------------------------------------------
diff --git a/src/changes/changes.xml b/src/changes/changes.xml
index fbb60b9..0a77677 100644
--- a/src/changes/changes.xml
+++ b/src/changes/changes.xml
@@ -22,6 +22,7 @@
   <body>
 
   <release version="1.0" date="tba" description="tba">
+    <action issue="SANDBOX-498" type="update" dev="britter">Improve HumanNameParser</action>
     <action issue="SANDBOX-497" type="fix" dev="kinow">IP clearance for the names package</action>
     <action issue="SANDBOX-496" type="add" dev="kinow">Write user guide</action>
     <action issue="SANDBOX-488" type="fix" dev="kinow">Work on the string metric, distance, and similarity definitions for the project</action>


[11/12] [text] Better JavaDoc for HumanNameParser

Posted by br...@apache.org.
Better JavaDoc for HumanNameParser


Project: http://git-wip-us.apache.org/repos/asf/commons-text/repo
Commit: http://git-wip-us.apache.org/repos/asf/commons-text/commit/6d047a46
Tree: http://git-wip-us.apache.org/repos/asf/commons-text/tree/6d047a46
Diff: http://git-wip-us.apache.org/repos/asf/commons-text/diff/6d047a46

Branch: refs/heads/SANDBOX-498
Commit: 6d047a461f83017c8b723f4b28c0ad10f3f1dc36
Parents: b1c7e56
Author: Benedikt Ritter <br...@apache.org>
Authored: Sun Apr 19 17:13:11 2015 +0200
Committer: Benedikt Ritter <br...@apache.org>
Committed: Sun Apr 19 17:13:11 2015 +0200

----------------------------------------------------------------------
 .../commons/text/names/HumanNameParser.java     | 99 +++++++++++++-------
 1 file changed, 64 insertions(+), 35 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/commons-text/blob/6d047a46/src/main/java/org/apache/commons/text/names/HumanNameParser.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/names/HumanNameParser.java b/src/main/java/org/apache/commons/text/names/HumanNameParser.java
index b5c0aa3..5407d15 100644
--- a/src/main/java/org/apache/commons/text/names/HumanNameParser.java
+++ b/src/main/java/org/apache/commons/text/names/HumanNameParser.java
@@ -24,58 +24,87 @@ import java.util.Objects;
 import org.apache.commons.lang3.StringUtils;
 
 /**
- * <p>A parser capable of parsing name parts out of a single string.</p>
+ * A parser capable of parsing name parts out of a single string.
  *
+ * <h3>Parsing examples</h3>
+ * 
  * <p>The code works by basically applying several Regexes in a certain order
- * and removing (chopping) tokens off the original string. The parser consumes
- * the tokens during its creation.</p>
+ * and removing (chopping) tokens off the original string. The parser creates
+ * a {@link Name} object representing the parse result. Note that passing null
+ * to the {@link #parse(String)} method will result in an exception.</p>
  *
- * <ul>
- * <li>J. Walter Weatherman </li>
- * <li>de la Cruz, Ana M.</li>
- * <li>James C. ('Jimmy') O'Dell, Jr.</li>
- * </ul>
- *
- * <p>and parses out the:</p>
- *
- * <ul>
- * <li>leading initial (Like "J." in "J. Walter Weatherman")</li>
- * <li>first name (or first initial in a name like 'R. Crumb')</li>
- * <li>nicknames (like "Jimmy" in "James C. ('Jimmy') O'Dell, Jr.")</li>
- * <li>middle names</li>
- * <li>last name (including compound ones like "van der Sar' and "Ortega y Gasset"), and</li>
- * <li>suffix (like 'Jr.', 'III')</li>
- * </ul>
+ * <table>
+ *  <tr>
+ *   <th>input</th>
+ *   <th>Leading initial</th>
+ *   <th>First name</th>
+ *   <th>Nick name</th>
+ *   <th>Middle name</th>
+ *   <th>Last Name</th>
+ *   <th>Suffix</th>
+ *  </tr>
+ *  <tr>
+ *   <td>J. Walter Weatherman</td>
+ *   <td>J.</td>
+ *   <td>Walter</td>
+ *   <td></td>
+ *   <td></td>
+ *   <td>Weatherman</td>
+ *   <td></td>
+ *  </tr>
+ *  <tr>
+ *   <td>de la Cruz, Ana M.</td>
+ *   <td></td>
+ *   <td>Ana</td>
+ *   <td></td>
+ *   <td>M.</td>
+ *   <td>de la Cruz</td>
+ *   <td></td>
+ *  </tr>
+ *  <tr>
+ *   <td>James C. ('Jimmy') O'Dell, Jr.</td>
+ *   <td></td>
+ *   <td>James</td>
+ *   <td>Jimmy</td>
+ *   <td>C.</td>
+ *   <td>O'Dell</td>
+ *   <td>Jr.</td>
+ *  </tr>
+ * </table>
  *
+ * <h3>Sample usage</h3>
+ * 
+ * <p>HumanNameParser instances are immutable and can be reused for parsing multiple names:</p>
+ * 
  * <pre>
- * Name name = new Name("S�rgio Vieira de Mello");
- * HumanNameParser parser = new HumanNameParser(name);
- * String firstName = parser.getFirst();
- * String nickname = parser.getNickname();
+ * HumanNameParser parser = new HumanNameParser();
+ * Name parsedName = parser.parse("S�rgio Vieira de Mello")
+ * String firstName = parsedName.getFirstName();
+ * String nickname = parsedName.getNickName();
  * // ...
+ * 
+ * Name nextName = parser.parse("James C. ('Jimmy') O'Dell, Jr.")
+ * String firstName = nextName.getFirstName();
+ * String nickname = nextName.getNickName();
  * </pre>
  *
+ * <h3>Further notes</h3>
+ * 
  * <p>The original code was written in <a href="http://jasonpriem.com/human-name-parse">PHP</a>
- * and ported to <a href="http://tupilabs.github.io/HumanNameParser.java/">Java</a>.</p>
- *
- * <p>This implementation is based on the Java implementation, with additions
- * suggested in <a href="https://issues.apache.org/jira/browse/SANDBOX-487">SANDBOX-487</a>.</p>
+ * and ported to <a href="http://tupilabs.github.io/HumanNameParser.java/">Java</a>. This 
+ * implementation is based on the Java implementation, with additions
+ * suggested in <a href="https://issues.apache.org/jira/browse/SANDBOX-487">SANDBOX-487</a>
+ * and <a href="https://issues.apache.org/jira/browse/SANDBOX-498">SANDBOX-498</a>.</p>
  *
  * <p>This class is immutable.</p>
  */
 public final class HumanNameParser {
 
-    /**
-     * Suffixes found.
-     */
     private final List<String> suffixes;
-    /**
-     * Prefixes found.
-     */
     private final List<String> prefixes;
 
     /**
-     * Creates a parser given a string name.
+     * Creates a new parser.
      */
     public HumanNameParser() {
         // TODO make this configurable
@@ -90,7 +119,7 @@ public final class HumanNameParser {
     }
 
     /**
-     * Consumes the string and creates the name parts.
+     * Parses a name from the given string.
      *
      * @param name the name to parse. Must not be null.
      * @throws NameParseException if the parser fails to retrieve the name parts.