You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pig.apache.org by pr...@apache.org on 2009/05/29 23:27:27 UTC
svn commit: r780113 - in /hadoop/pig/trunk: ./
src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/
src/org/apache/pig/backend/hadoop/executionengine/util/
test/org/apache/pig/test/
Author: pradeepkth
Date: Fri May 29 21:27:27 2009
New Revision: 780113
URL: http://svn.apache.org/viewvc?rev=780113&view=rev
Log:
PIG-816: PigStorage() does not accept Unicode characters in its contructor (pradeepkth)
Modified:
hadoop/pig/trunk/CHANGES.txt
hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/JobControlCompiler.java
hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/MapReducePOStoreImpl.java
hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/util/MapRedUtil.java
hadoop/pig/trunk/test/org/apache/pig/test/TestEvalPipeline2.java
Modified: hadoop/pig/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/CHANGES.txt?rev=780113&r1=780112&r2=780113&view=diff
==============================================================================
--- hadoop/pig/trunk/CHANGES.txt (original)
+++ hadoop/pig/trunk/CHANGES.txt Fri May 29 21:27:27 2009
@@ -48,6 +48,8 @@
BUG FIXES
+PIG-816: PigStorage() does not accept Unicode characters in its contructor (pradeepkth)
+
PIG-818: Explain doesn't handle PODemux properly (hagleitn via olgan)
PIG-819: run -param -param; is a valid grunt command (milindb via olgan)
Modified: hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/JobControlCompiler.java
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/JobControlCompiler.java?rev=780113&r1=780112&r2=780113&view=diff
==============================================================================
--- hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/JobControlCompiler.java (original)
+++ hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/JobControlCompiler.java Fri May 29 21:27:27 2009
@@ -383,8 +383,14 @@
String outputPath = st.getSFile().getFileName();
FuncSpec outputFuncSpec = st.getSFile().getFuncSpec();
FileOutputFormat.setOutputPath(jobConf, new Path(outputPath));
-
- jobConf.set("pig.storeFunc", outputFuncSpec.toString());
+
+ // serialize the store func spec using ObjectSerializer
+ // ObjectSerializer.serialize() uses default java serialization
+ // and then further encodes the output so that control characters
+ // get encoded as regular characters. Otherwise any control characters
+ // in the store funcspec would break the job.xml which is created by
+ // hadoop from the jobconf.
+ jobConf.set("pig.storeFunc", ObjectSerializer.serialize(outputFuncSpec.toString()));
jobConf.set(PIG_STORE_CONFIG,
ObjectSerializer.serialize(new StoreConfig(outputPath, st.getSchema())));
Modified: hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/MapReducePOStoreImpl.java
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/MapReducePOStoreImpl.java?rev=780113&r1=780112&r2=780113&view=diff
==============================================================================
--- hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/MapReducePOStoreImpl.java (original)
+++ hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/MapReducePOStoreImpl.java Fri May 29 21:27:27 2009
@@ -109,7 +109,13 @@
// PigOuputFormat will look for pig.storeFunc to actually
// write stuff out.
- outputConf.set("pig.storeFunc", sFile.getFuncSpec().toString());
+ // serialize the store func spec using ObjectSerializer
+ // ObjectSerializer.serialize() uses default java serialization
+ // and then further encodes the output so that control characters
+ // get encoded as regular characters. Otherwise any control characters
+ // in the store funcspec would break the job.xml which is created by
+ // hadoop from the jobconf.
+ outputConf.set("pig.storeFunc", ObjectSerializer.serialize(sFile.getFuncSpec().toString()));
// We set the output dir to the final location of the output,
// the output dir set in the original job config points to the
Modified: hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/util/MapRedUtil.java
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/util/MapRedUtil.java?rev=780113&r1=780112&r2=780113&view=diff
==============================================================================
--- hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/util/MapRedUtil.java (original)
+++ hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/util/MapRedUtil.java Fri May 29 21:27:27 2009
@@ -34,18 +34,19 @@
*/
public static StoreFunc getStoreFunc(JobConf conf) throws ExecException {
StoreFunc store;
- String storeFunc = conf.get("pig.storeFunc", "");
- if (storeFunc.length() == 0) {
- store = new PigStorage();
- } else {
- try {
+ try {
+ String storeFunc = conf.get("pig.storeFunc", "");
+ if (storeFunc.length() == 0) {
+ store = new PigStorage();
+ } else {
+ storeFunc = (String) ObjectSerializer.deserialize(storeFunc);
store = (StoreFunc) PigContext
.instantiateFuncFromSpec(storeFunc);
- } catch (Exception e) {
- int errCode = 2081;
- String msg = "Unable to setup the store function.";
- throw new ExecException(msg, errCode, PigException.BUG, e);
}
+ } catch (Exception e) {
+ int errCode = 2081;
+ String msg = "Unable to setup the store function.";
+ throw new ExecException(msg, errCode, PigException.BUG, e);
}
return store;
}
Modified: hadoop/pig/trunk/test/org/apache/pig/test/TestEvalPipeline2.java
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/test/org/apache/pig/test/TestEvalPipeline2.java?rev=780113&r1=780112&r2=780113&view=diff
==============================================================================
--- hadoop/pig/trunk/test/org/apache/pig/test/TestEvalPipeline2.java (original)
+++ hadoop/pig/trunk/test/org/apache/pig/test/TestEvalPipeline2.java Fri May 29 21:27:27 2009
@@ -290,5 +290,33 @@
Util.deleteFile(cluster, "table_bs_ac_clxt");
}
+
+ @Test
+ public void testPigStorageWithCtrlChars() throws Exception {
+ String[] inputData = { "hello\u0001world", "good\u0001morning", "nice\u0001day" };
+ Util.createInputFile(cluster, "testPigStorageWithCtrlCharsInput.txt", inputData);
+ String script = "a = load 'testPigStorageWithCtrlCharsInput.txt' using PigStorage('\u0001');" +
+ "b = foreach a generate $0, CONCAT($0, '\u0005'), $1; " +
+ "store b into 'testPigStorageWithCtrlCharsOutput.txt' using PigStorage('\u0001');" +
+ "c = load 'testPigStorageWithCtrlCharsOutput.txt' using PigStorage('\u0001') as (f1:chararray, f2:chararray, f3:chararray);";
+ Util.registerQuery(pigServer, script);
+ Iterator<Tuple> it = pigServer.openIterator("c");
+ HashMap<String, Tuple> expectedResults = new HashMap<String, Tuple>();
+ expectedResults.put("hello", (Tuple) Util.getPigConstant("('hello','hello\u0005','world')"));
+ expectedResults.put("good", (Tuple) Util.getPigConstant("('good','good\u0005','morning')"));
+ expectedResults.put("nice", (Tuple) Util.getPigConstant("('nice','nice\u0005','day')"));
+ HashMap<String, Boolean> seen = new HashMap<String, Boolean>();
+ int numRows = 0;
+ while(it.hasNext()) {
+ Tuple t = it.next();
+ String firstCol = (String) t.get(0);
+ assertFalse(seen.containsKey(firstCol));
+ seen.put(firstCol, true);
+ assertEquals(expectedResults.get(firstCol), t);
+ numRows++;
+ }
+ assertEquals(3, numRows);
+
+ }
}