You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@commons.apache.org by ch...@apache.org on 2017/03/17 11:05:45 UTC

[08/14] [lang] LANG-1300: fixed CharSequenceUtils indexOf for Supplementary chars

LANG-1300: fixed CharSequenceUtils indexOf for Supplementary chars


Project: http://git-wip-us.apache.org/repos/asf/commons-lang/repo
Commit: http://git-wip-us.apache.org/repos/asf/commons-lang/commit/12e597a7
Tree: http://git-wip-us.apache.org/repos/asf/commons-lang/tree/12e597a7
Diff: http://git-wip-us.apache.org/repos/asf/commons-lang/diff/12e597a7

Branch: refs/heads/master
Commit: 12e597a78cbf9d82414b944a5b285d81a2e27e51
Parents: 66f8569
Author: MarkDacek <ma...@richmond.edu>
Authored: Wed Mar 8 22:58:51 2017 -0500
Committer: MarkDacek <ma...@richmond.edu>
Committed: Wed Mar 8 22:58:51 2017 -0500

----------------------------------------------------------------------
 .../apache/commons/lang3/CharSequenceUtils.java | 37 +++++++++++---------
 .../lang3/StringUtilsEqualsIndexOfTest.java     |  2 ++
 2 files changed, 22 insertions(+), 17 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/commons-lang/blob/12e597a7/src/main/java/org/apache/commons/lang3/CharSequenceUtils.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/lang3/CharSequenceUtils.java b/src/main/java/org/apache/commons/lang3/CharSequenceUtils.java
index 1d97e55..cc39202 100644
--- a/src/main/java/org/apache/commons/lang3/CharSequenceUtils.java
+++ b/src/main/java/org/apache/commons/lang3/CharSequenceUtils.java
@@ -84,17 +84,19 @@ public class CharSequenceUtils {
         }
         //supplementary characters (LANG1300)
         if (searchChar <= Character.MAX_CODE_POINT) {
+            int ind = 0;
             char[] chars = Character.toChars(searchChar);
-        	for (int i = start; i < sz; i++) {
-        		if (cs.charAt(i) == chars[0]) {
-        			if (i + 1 == sz) {
-        				break;
-        			}
-        			if (cs.charAt(i + 1) == chars[1]) {
-        				return i;
-        			}
-        		}
-        	}
+            for (int i = start; i < sz - 1; i++) {
+                char high = cs.charAt(i);
+                char low = cs.charAt(i + 1);
+                if (high == chars[0] && low == chars[1]) {
+                    return ind;
+                } else if (Character.isSurrogatePair(high, low)) {
+                    //skip over 1
+                    i++;
+                }
+                ind++;
+            }
         }
         return NOT_FOUND;
     }
@@ -148,17 +150,18 @@ public class CharSequenceUtils {
             }
         }
         //supplementary characters (LANG1300)
+        //NOTE - we must do a forward traversal for this to avoid duplicating code points
         if (searchChar <= Character.MAX_CODE_POINT) {
             char[] chars = Character.toChars(searchChar);
             for (int i = start; i >= 0; --i) {
                 if (cs.charAt(i) == chars[0]) {
-            	    if (i + 1 == cs.length()) {
-            		    break;
-            		}
-            		if (cs.charAt(i + 1) == chars[1]) {
-            		    return i;
-            		}
-            	}
+                    if (i + 1 == cs.length()) {
+                        break;
+                    }
+                    if (cs.charAt(i + 1) == chars[1]) {
+                        return i;
+                    }
+                }
             }
         }
         return NOT_FOUND;

http://git-wip-us.apache.org/repos/asf/commons-lang/blob/12e597a7/src/test/java/org/apache/commons/lang3/StringUtilsEqualsIndexOfTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/commons/lang3/StringUtilsEqualsIndexOfTest.java b/src/test/java/org/apache/commons/lang3/StringUtilsEqualsIndexOfTest.java
index 1c99fa4..8326061 100644
--- a/src/test/java/org/apache/commons/lang3/StringUtilsEqualsIndexOfTest.java
+++ b/src/test/java/org/apache/commons/lang3/StringUtilsEqualsIndexOfTest.java
@@ -300,6 +300,8 @@ public class StringUtilsEqualsIndexOfTest  {
     	StringBuilder builder = new StringBuilder();
     	builder.appendCodePoint(CODE_POINT);
     	assertEquals(0, StringUtils.indexOf(builder, CODE_POINT, 0));
+    	builder.appendCodePoint(CODE_POINT);
+    	assertEquals(1, StringUtils.indexOf(builder, CODE_POINT, 1));
     	//inner branch on the supplementary character block
     	char[] tmp = {(char) 55361};
     	builder = new StringBuilder();