You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by st...@apache.org on 2020/07/24 09:37:47 UTC
[hadoop] branch trunk updated: HADOOP-17141. Add Capability To Get
Text Length (#2157)
This is an automated email from the ASF dual-hosted git repository.
stevel pushed a commit to branch trunk
in repository https://gitbox.apache.org/repos/asf/hadoop.git
The following commit(s) were added to refs/heads/trunk by this push:
new e60096c HADOOP-17141. Add Capability To Get Text Length (#2157)
e60096c is described below
commit e60096c377d8a3cb5bed3992352779195be95bb4
Author: belugabehr <12...@users.noreply.github.com>
AuthorDate: Fri Jul 24 05:37:28 2020 -0400
HADOOP-17141. Add Capability To Get Text Length (#2157)
Contributed by David Mollitor
---
.../src/main/java/org/apache/hadoop/io/Text.java | 18 ++++++++++++++++++
.../src/test/java/org/apache/hadoop/io/TestText.java | 10 ++++++++++
2 files changed, 28 insertions(+)
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/Text.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/Text.java
index 716de3d..6022b99 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/Text.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/Text.java
@@ -77,6 +77,7 @@ public class Text extends BinaryComparable
private byte[] bytes = EMPTY_BYTES;
private int length = 0;
+ private int textLength = -1;
/**
* Construct an empty text string.
@@ -132,6 +133,17 @@ public class Text extends BinaryComparable
}
/**
+ * Returns the length of this text. The length is equal to the number of
+ * Unicode code units in the text.
+ */
+ public int getTextLength() {
+ if (textLength < 0) {
+ textLength = toString().length();
+ }
+ return textLength;
+ }
+
+ /**
* Returns the Unicode Scalar Value (32-bit integer value)
* for the character at <code>position</code>. Note that this
* method avoids using the converter or doing String instantiation
@@ -204,6 +216,7 @@ public class Text extends BinaryComparable
ByteBuffer bb = encode(string, true);
bytes = bb.array();
length = bb.limit();
+ textLength = string.length();
} catch (CharacterCodingException e) {
throw new RuntimeException("Should not have happened", e);
}
@@ -221,6 +234,7 @@ public class Text extends BinaryComparable
*/
public void set(Text other) {
set(other.getBytes(), 0, other.getLength());
+ this.textLength = other.textLength;
}
/**
@@ -234,6 +248,7 @@ public class Text extends BinaryComparable
ensureCapacity(len);
System.arraycopy(utf8, start, bytes, 0, len);
this.length = len;
+ this.textLength = -1;
}
/**
@@ -251,6 +266,7 @@ public class Text extends BinaryComparable
}
System.arraycopy(utf8, start, bytes, length, len);
length += len;
+ textLength = -1;
}
/**
@@ -263,6 +279,7 @@ public class Text extends BinaryComparable
*/
public void clear() {
length = 0;
+ textLength = -1;
}
/**
@@ -327,6 +344,7 @@ public class Text extends BinaryComparable
ensureCapacity(len);
in.readFully(bytes, 0, len);
length = len;
+ textLength = -1;
}
/**
diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/TestText.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/TestText.java
index 54df399..700e106 100644
--- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/TestText.java
+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/TestText.java
@@ -268,6 +268,8 @@ public class TestText {
0, text.getBytes().length);
assertEquals("String's length must be zero",
0, text.getLength());
+ assertEquals("String's text length must be zero",
+ 0, text.getTextLength());
// Test if clear works as intended
text = new Text("abcd\u20acbdcd\u20ac");
@@ -280,6 +282,8 @@ public class TestText {
text.getBytes().length >= len);
assertEquals("Length of the string must be reset to 0 after clear()",
0, text.getLength());
+ assertEquals("Text length of the string must be reset to 0 after clear()",
+ 0, text.getTextLength());
}
@Test
@@ -288,9 +292,12 @@ public class TestText {
Text b=new Text("a");
b.set(a);
assertEquals("abc", b.toString());
+ assertEquals(3, a.getTextLength());
+ assertEquals(3, b.getTextLength());
a.append("xdefgxxx".getBytes(), 1, 4);
assertEquals("modified aliased string", "abc", b.toString());
assertEquals("appended string incorrectly", "abcdefg", a.toString());
+ assertEquals("This should reflect in the lenght", 7, a.getTextLength());
// add an extra byte so that capacity = 10 and length = 8
a.append(new byte[]{'d'}, 0, 1);
assertEquals(10, a.getBytes().length);
@@ -392,16 +399,19 @@ public class TestText {
in.reset(inputBytes, inputBytes.length);
text.readWithKnownLength(in, 5);
assertEquals("hello", text.toString());
+ assertEquals(5, text.getTextLength());
// Read longer length, make sure it lengthens
in.reset(inputBytes, inputBytes.length);
text.readWithKnownLength(in, 7);
assertEquals("hello w", text.toString());
+ assertEquals(7, text.getTextLength());
// Read shorter length, make sure it shortens
in.reset(inputBytes, inputBytes.length);
text.readWithKnownLength(in, 2);
assertEquals("he", text.toString());
+ assertEquals(2, text.getTextLength());
}
/**
---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org