You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@commons.apache.org by ch...@apache.org on 2017/03/17 11:05:45 UTC
[08/14] [lang] LANG-1300: fixed CharSequenceUtils indexOf for
Supplementary chars
LANG-1300: fixed CharSequenceUtils indexOf for Supplementary chars
Project: http://git-wip-us.apache.org/repos/asf/commons-lang/repo
Commit: http://git-wip-us.apache.org/repos/asf/commons-lang/commit/12e597a7
Tree: http://git-wip-us.apache.org/repos/asf/commons-lang/tree/12e597a7
Diff: http://git-wip-us.apache.org/repos/asf/commons-lang/diff/12e597a7
Branch: refs/heads/master
Commit: 12e597a78cbf9d82414b944a5b285d81a2e27e51
Parents: 66f8569
Author: MarkDacek <ma...@richmond.edu>
Authored: Wed Mar 8 22:58:51 2017 -0500
Committer: MarkDacek <ma...@richmond.edu>
Committed: Wed Mar 8 22:58:51 2017 -0500
----------------------------------------------------------------------
.../apache/commons/lang3/CharSequenceUtils.java | 37 +++++++++++---------
.../lang3/StringUtilsEqualsIndexOfTest.java | 2 ++
2 files changed, 22 insertions(+), 17 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/commons-lang/blob/12e597a7/src/main/java/org/apache/commons/lang3/CharSequenceUtils.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/lang3/CharSequenceUtils.java b/src/main/java/org/apache/commons/lang3/CharSequenceUtils.java
index 1d97e55..cc39202 100644
--- a/src/main/java/org/apache/commons/lang3/CharSequenceUtils.java
+++ b/src/main/java/org/apache/commons/lang3/CharSequenceUtils.java
@@ -84,17 +84,19 @@ public class CharSequenceUtils {
}
//supplementary characters (LANG1300)
if (searchChar <= Character.MAX_CODE_POINT) {
+ int ind = 0;
char[] chars = Character.toChars(searchChar);
- for (int i = start; i < sz; i++) {
- if (cs.charAt(i) == chars[0]) {
- if (i + 1 == sz) {
- break;
- }
- if (cs.charAt(i + 1) == chars[1]) {
- return i;
- }
- }
- }
+ for (int i = start; i < sz - 1; i++) {
+ char high = cs.charAt(i);
+ char low = cs.charAt(i + 1);
+ if (high == chars[0] && low == chars[1]) {
+ return ind;
+ } else if (Character.isSurrogatePair(high, low)) {
+ //skip over 1
+ i++;
+ }
+ ind++;
+ }
}
return NOT_FOUND;
}
@@ -148,17 +150,18 @@ public class CharSequenceUtils {
}
}
//supplementary characters (LANG1300)
+ //NOTE - we must do a forward traversal for this to avoid duplicating code points
if (searchChar <= Character.MAX_CODE_POINT) {
char[] chars = Character.toChars(searchChar);
for (int i = start; i >= 0; --i) {
if (cs.charAt(i) == chars[0]) {
- if (i + 1 == cs.length()) {
- break;
- }
- if (cs.charAt(i + 1) == chars[1]) {
- return i;
- }
- }
+ if (i + 1 == cs.length()) {
+ break;
+ }
+ if (cs.charAt(i + 1) == chars[1]) {
+ return i;
+ }
+ }
}
}
return NOT_FOUND;
http://git-wip-us.apache.org/repos/asf/commons-lang/blob/12e597a7/src/test/java/org/apache/commons/lang3/StringUtilsEqualsIndexOfTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/commons/lang3/StringUtilsEqualsIndexOfTest.java b/src/test/java/org/apache/commons/lang3/StringUtilsEqualsIndexOfTest.java
index 1c99fa4..8326061 100644
--- a/src/test/java/org/apache/commons/lang3/StringUtilsEqualsIndexOfTest.java
+++ b/src/test/java/org/apache/commons/lang3/StringUtilsEqualsIndexOfTest.java
@@ -300,6 +300,8 @@ public class StringUtilsEqualsIndexOfTest {
StringBuilder builder = new StringBuilder();
builder.appendCodePoint(CODE_POINT);
assertEquals(0, StringUtils.indexOf(builder, CODE_POINT, 0));
+ builder.appendCodePoint(CODE_POINT);
+ assertEquals(1, StringUtils.indexOf(builder, CODE_POINT, 1));
//inner branch on the supplementary character block
char[] tmp = {(char) 55361};
builder = new StringBuilder();