You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pig.apache.org by ro...@apache.org on 2016/05/31 23:00:46 UTC
svn commit: r1746360 - in /pig/branches/branch-0.15: CHANGES.txt
src/org/apache/pig/data/utils/SedesHelper.java
Author: rohini
Date: Tue May 31 23:00:46 2016
New Revision: 1746360
URL: http://svn.apache.org/viewvc?rev=1746360&view=rev
Log:
PIG-4821: Pig chararray field with special UTF-8 chars as part of tuple join key produces wrong results in Tez (rohini)
Modified:
pig/branches/branch-0.15/CHANGES.txt
pig/branches/branch-0.15/src/org/apache/pig/data/utils/SedesHelper.java
Modified: pig/branches/branch-0.15/CHANGES.txt
URL: http://svn.apache.org/viewvc/pig/branches/branch-0.15/CHANGES.txt?rev=1746360&r1=1746359&r2=1746360&view=diff
==============================================================================
--- pig/branches/branch-0.15/CHANGES.txt (original)
+++ pig/branches/branch-0.15/CHANGES.txt Tue May 31 23:00:46 2016
@@ -28,6 +28,8 @@ OPTIMIZATIONS
BUG FIXES
+PIG-4821: Pig chararray field with special UTF-8 chars as part of tuple join key produces wrong results in Tez (rohini)
+
PIG-4860: Loading data using OrcStorage() accepts only default FileSystem path (beriaanirudh via rohini)
PIG-4867: -stop_on_failure does not work with Tez (rohini)
Modified: pig/branches/branch-0.15/src/org/apache/pig/data/utils/SedesHelper.java
URL: http://svn.apache.org/viewvc/pig/branches/branch-0.15/src/org/apache/pig/data/utils/SedesHelper.java?rev=1746360&r1=1746359&r2=1746360&view=diff
==============================================================================
--- pig/branches/branch-0.15/src/org/apache/pig/data/utils/SedesHelper.java (original)
+++ pig/branches/branch-0.15/src/org/apache/pig/data/utils/SedesHelper.java Tue May 31 23:00:46 2016
@@ -61,25 +61,25 @@ public class SedesHelper {
public static void writeChararray(DataOutput out, String s) throws IOException {
// a char can take up to 3 bytes in the modified utf8 encoding
// used by DataOutput.writeUTF, so use UNSIGNED_SHORT_MAX/3
- if (s.length() < BinInterSedes.UNSIGNED_SHORT_MAX / 3) {
+ byte[] utfBytes = s.getBytes(BinInterSedes.UTF8);
+ int length = utfBytes.length;
+ if (length < BinInterSedes.UNSIGNED_SHORT_MAX) {
out.writeByte(BinInterSedes.SMALLCHARARRAY);
- out.writeUTF(s);
+ out.writeShort(length);
} else {
- byte[] utfBytes = s.getBytes(BinInterSedes.UTF8);
- int length = utfBytes.length;
-
out.writeByte(BinInterSedes.CHARARRAY);
out.writeInt(length);
- out.write(utfBytes);
}
+ out.write(utfBytes);
}
public static String readChararray(DataInput in, byte type) throws IOException {
+ int size;
if (type == BinInterSedes.SMALLCHARARRAY) {
- return in.readUTF();
+ size = in.readUnsignedShort();
+ } else {
+ size = in.readInt();
}
-
- int size = in.readInt();
byte[] buf = new byte[size];
in.readFully(buf);
return new String(buf, BinInterSedes.UTF8);