You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by aa...@apache.org on 2016/06/20 08:09:05 UTC
hadoop git commit: HADOOP-13192. org.apache.hadoop.util.LineReader
cannot handle multibyte delimiters correctly. Contributed by binde.
Repository: hadoop
Updated Branches:
refs/heads/trunk d0162f204 -> fc6b50cc5
HADOOP-13192. org.apache.hadoop.util.LineReader cannot handle multibyte delimiters correctly. Contributed by binde.
Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/fc6b50cc
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/fc6b50cc
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/fc6b50cc
Branch: refs/heads/trunk
Commit: fc6b50cc574e144fd314dea6c11987c6a384bfa6
Parents: d0162f2
Author: Akira Ajisaka <aa...@apache.org>
Authored: Mon Jun 20 17:07:26 2016 +0900
Committer: Akira Ajisaka <aa...@apache.org>
Committed: Mon Jun 20 17:07:26 2016 +0900
----------------------------------------------------------------------
.../java/org/apache/hadoop/util/LineReader.java | 5 +-
.../org/apache/hadoop/util/TestLineReader.java | 59 ++++++++++++--------
2 files changed, 41 insertions(+), 23 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hadoop/blob/fc6b50cc/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/LineReader.java
----------------------------------------------------------------------
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/LineReader.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/LineReader.java
index 153953d..e20a7c1 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/LineReader.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/LineReader.java
@@ -318,7 +318,10 @@ public class LineReader implements Closeable {
break;
}
} else if (delPosn != 0) {
- bufferPosn--;
+ bufferPosn -= delPosn;
+ if(bufferPosn < -1) {
+ bufferPosn = -1;
+ }
delPosn = 0;
}
}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/fc6b50cc/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestLineReader.java
----------------------------------------------------------------------
diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestLineReader.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestLineReader.java
index 9d909bc..52f8b9f 100644
--- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestLineReader.java
+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestLineReader.java
@@ -58,7 +58,7 @@ public class TestLineReader {
* Check Condition
* In the second key value pair, the value should contain
* "</" from currentToken and
- * "id>" from next token
+ * "id>" from next token
*/
Delimiter="</entity>";
@@ -80,20 +80,21 @@ public class TestLineReader {
String TestPartOfInput = CurrentBufferTailToken+NextBufferHeadToken;
int BufferSize=64 * 1024;
- int numberOfCharToFillTheBuffer=BufferSize-CurrentBufferTailToken.length();
+ int numberOfCharToFillTheBuffer =
+ BufferSize - CurrentBufferTailToken.length();
StringBuilder fillerString=new StringBuilder();
- for (int i=0;i<numberOfCharToFillTheBuffer;i++) {
+ for (int i=0; i<numberOfCharToFillTheBuffer; i++) {
fillerString.append('a'); // char 'a' as a filler for the test string
}
TestData = fillerString + TestPartOfInput;
lineReader = new LineReader(
- new ByteArrayInputStream(TestData.getBytes()),Delimiter.getBytes());
+ new ByteArrayInputStream(TestData.getBytes()), Delimiter.getBytes());
line = new Text();
- lineReader.readLine(line);
- Assert.assertEquals(fillerString.toString(),line.toString());
+ lineReader.readLine(line);
+ Assert.assertEquals(fillerString.toString(), line.toString());
lineReader.readLine(line);
Assert.assertEquals(Expected, line.toString());
@@ -107,35 +108,49 @@ public class TestLineReader {
Delimiter = "record";
StringBuilder TestStringBuilder = new StringBuilder();
- TestStringBuilder.append(Delimiter+"Kerala ");
- TestStringBuilder.append(Delimiter+"Bangalore");
- TestStringBuilder.append(Delimiter+" North Korea");
- TestStringBuilder.append(Delimiter+Delimiter+
+ TestStringBuilder.append(Delimiter + "Kerala ");
+ TestStringBuilder.append(Delimiter + "Bangalore");
+ TestStringBuilder.append(Delimiter + " North Korea");
+ TestStringBuilder.append(Delimiter + Delimiter+
"Guantanamo");
- TestStringBuilder.append(Delimiter+"ecord"+"recor"+"core"); //~EOF with 're'
+ TestStringBuilder.append(Delimiter + "ecord"
+ + "recor" + "core"); //~EOF with 're'
TestData=TestStringBuilder.toString();
lineReader = new LineReader(
- new ByteArrayInputStream(TestData.getBytes()),Delimiter.getBytes());
-
- lineReader.readLine(line);
- Assert.assertEquals("",line.toString());
- lineReader.readLine(line);
- Assert.assertEquals("Kerala ",line.toString());
+ new ByteArrayInputStream(TestData.getBytes()), Delimiter.getBytes());
+
+ lineReader.readLine(line);
+ Assert.assertEquals("", line.toString());
+ lineReader.readLine(line);
+ Assert.assertEquals("Kerala ", line.toString());
lineReader.readLine(line);
- Assert.assertEquals("Bangalore",line.toString());
+ Assert.assertEquals("Bangalore", line.toString());
lineReader.readLine(line);
- Assert.assertEquals(" North Korea",line.toString());
+ Assert.assertEquals(" North Korea", line.toString());
lineReader.readLine(line);
- Assert.assertEquals("",line.toString());
+ Assert.assertEquals("", line.toString());
lineReader.readLine(line);
- Assert.assertEquals("Guantanamo",line.toString());
+ Assert.assertEquals("Guantanamo", line.toString());
lineReader.readLine(line);
- Assert.assertEquals(("ecord"+"recor"+"core"),line.toString());
+ Assert.assertEquals(("ecord"+"recor"+"core"), line.toString());
+
+ // Test 3
+ // The test scenario is such that,
+ // aaaabccc split by aaab
+ TestData = "aaaabccc";
+ Delimiter = "aaab";
+ lineReader = new LineReader(
+ new ByteArrayInputStream(TestData.getBytes()), Delimiter.getBytes());
+
+ lineReader.readLine(line);
+ Assert.assertEquals("a", line.toString());
+ lineReader.readLine(line);
+ Assert.assertEquals("ccc", line.toString());
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org