You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pig.apache.org by ol...@apache.org on 2008/10/31 20:15:07 UTC
svn commit: r709537 - in /hadoop/pig/branches/types: CHANGES.txt
src/org/apache/pig/data/DataReaderWriter.java
test/org/apache/pig/test/TestEvalPipeline.java
test/org/apache/pig/test/Util.java
Author: olga
Date: Fri Oct 31 12:15:07 2008
New Revision: 709537
URL: http://svn.apache.org/viewvc?rev=709537&view=rev
Log:
PIG-497: UTF8 handling in BinStorage
Modified:
hadoop/pig/branches/types/CHANGES.txt
hadoop/pig/branches/types/src/org/apache/pig/data/DataReaderWriter.java
hadoop/pig/branches/types/test/org/apache/pig/test/TestEvalPipeline.java
hadoop/pig/branches/types/test/org/apache/pig/test/Util.java
Modified: hadoop/pig/branches/types/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/types/CHANGES.txt?rev=709537&r1=709536&r2=709537&view=diff
==============================================================================
--- hadoop/pig/branches/types/CHANGES.txt (original)
+++ hadoop/pig/branches/types/CHANGES.txt Fri Oct 31 12:15:07 2008
@@ -303,3 +303,6 @@
PIG-507: permission error not reported (pradeepk via olgan)
PIG-508: problem with double joins (pradeepk via olgan)
+
+ PIG-497: problems with UTF8 handling in BinStorage (pradeepk via olgan)
+
Modified: hadoop/pig/branches/types/src/org/apache/pig/data/DataReaderWriter.java
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/types/src/org/apache/pig/data/DataReaderWriter.java?rev=709537&r1=709536&r2=709537&view=diff
==============================================================================
--- hadoop/pig/branches/types/src/org/apache/pig/data/DataReaderWriter.java (original)
+++ hadoop/pig/branches/types/src/org/apache/pig/data/DataReaderWriter.java Fri Oct 31 12:15:07 2008
@@ -105,12 +105,8 @@
return new DataByteArray(ba);
}
- case DataType.CHARARRAY: {
- int size = in.readInt();
- byte[] ba = new byte[size];
- in.readFully(ba);
- return new String(ba);
- }
+ case DataType.CHARARRAY:
+ return in.readUTF();
case DataType.NULL:
return null;
@@ -194,9 +190,7 @@
case DataType.CHARARRAY: {
out.writeByte(DataType.CHARARRAY);
- String s = (String)val;
- out.writeInt(s.length());
- out.writeBytes(s);
+ out.writeUTF((String)val);
break;
}
Modified: hadoop/pig/branches/types/test/org/apache/pig/test/TestEvalPipeline.java
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/types/test/org/apache/pig/test/TestEvalPipeline.java?rev=709537&r1=709536&r2=709537&view=diff
==============================================================================
--- hadoop/pig/branches/types/test/org/apache/pig/test/TestEvalPipeline.java (original)
+++ hadoop/pig/branches/types/test/org/apache/pig/test/TestEvalPipeline.java Fri Oct 31 12:15:07 2008
@@ -693,4 +693,18 @@
assertEquals(output.second, t.get(2));
}
}
+
+ @Test
+ public void testUtf8Dump() throws IOException, ExecException {
+
+ // Create input file with unicode data
+ File input = Util.createInputFile("tmp", "",
+ new String[] {"wendyξ"});
+ pigServer.registerQuery("a = load 'file:" + Util.encodeEscape(input.toString()) + "' using PigStorage() " +
+ "as (name:chararray);");
+ Iterator<Tuple> it = pigServer.openIterator("a");
+ Tuple t = it.next();
+ assertEquals("wendyξ", t.get(0));
+
+ }
}
Modified: hadoop/pig/branches/types/test/org/apache/pig/test/Util.java
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/types/test/org/apache/pig/test/Util.java?rev=709537&r1=709536&r2=709537&view=diff
==============================================================================
--- hadoop/pig/branches/types/test/org/apache/pig/test/Util.java (original)
+++ hadoop/pig/branches/types/test/org/apache/pig/test/Util.java Fri Oct 31 12:15:07 2008
@@ -18,6 +18,8 @@
package org.apache.pig.test;
import java.io.File;
+import java.io.FileOutputStream;
+import java.io.OutputStreamWriter;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.HashMap;
@@ -166,7 +168,7 @@
throws IOException {
File f = File.createTempFile(tmpFilenamePrefix, tmpFilenameSuffix);
f.deleteOnExit();
- PrintWriter pw = new PrintWriter(f);
+ PrintWriter pw = new PrintWriter(new OutputStreamWriter(new FileOutputStream(f), "UTF-8"));
for (int i=0; i<inputData.length; i++){
pw.println(inputData[i]);
}