You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pig.apache.org by pr...@apache.org on 2009/05/29 23:27:27 UTC

svn commit: r780113 - in /hadoop/pig/trunk: ./ src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/ src/org/apache/pig/backend/hadoop/executionengine/util/ test/org/apache/pig/test/

Author: pradeepkth
Date: Fri May 29 21:27:27 2009
New Revision: 780113

URL: http://svn.apache.org/viewvc?rev=780113&view=rev
Log:
PIG-816: PigStorage() does not accept Unicode characters in its contructor (pradeepkth)

Modified:
    hadoop/pig/trunk/CHANGES.txt
    hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/JobControlCompiler.java
    hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/MapReducePOStoreImpl.java
    hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/util/MapRedUtil.java
    hadoop/pig/trunk/test/org/apache/pig/test/TestEvalPipeline2.java

Modified: hadoop/pig/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/CHANGES.txt?rev=780113&r1=780112&r2=780113&view=diff
==============================================================================
--- hadoop/pig/trunk/CHANGES.txt (original)
+++ hadoop/pig/trunk/CHANGES.txt Fri May 29 21:27:27 2009
@@ -48,6 +48,8 @@
 
 BUG FIXES
 
+PIG-816: PigStorage() does not accept Unicode characters in its contructor (pradeepkth)
+
 PIG-818: Explain doesn't handle PODemux properly (hagleitn via olgan)
 
 PIG-819: run -param -param; is a valid grunt command (milindb via olgan)

Modified: hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/JobControlCompiler.java
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/JobControlCompiler.java?rev=780113&r1=780112&r2=780113&view=diff
==============================================================================
--- hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/JobControlCompiler.java (original)
+++ hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/JobControlCompiler.java Fri May 29 21:27:27 2009
@@ -383,8 +383,14 @@
                 String outputPath = st.getSFile().getFileName();
                 FuncSpec outputFuncSpec = st.getSFile().getFuncSpec();
                 FileOutputFormat.setOutputPath(jobConf, new Path(outputPath));
-             
-                jobConf.set("pig.storeFunc", outputFuncSpec.toString());
+                
+                // serialize the store func spec using ObjectSerializer
+                // ObjectSerializer.serialize() uses default java serialization
+                // and then further encodes the output so that control characters
+                // get encoded as regular characters. Otherwise any control characters
+                // in the store funcspec would break the job.xml which is created by
+                // hadoop from the jobconf.
+                jobConf.set("pig.storeFunc", ObjectSerializer.serialize(outputFuncSpec.toString()));
                 jobConf.set(PIG_STORE_CONFIG, 
                             ObjectSerializer.serialize(new StoreConfig(outputPath, st.getSchema())));
 

Modified: hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/MapReducePOStoreImpl.java
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/MapReducePOStoreImpl.java?rev=780113&r1=780112&r2=780113&view=diff
==============================================================================
--- hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/MapReducePOStoreImpl.java (original)
+++ hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/MapReducePOStoreImpl.java Fri May 29 21:27:27 2009
@@ -109,7 +109,13 @@
 
         // PigOuputFormat will look for pig.storeFunc to actually
         // write stuff out.
-        outputConf.set("pig.storeFunc", sFile.getFuncSpec().toString());
+        // serialize the store func spec using ObjectSerializer
+        // ObjectSerializer.serialize() uses default java serialization
+        // and then further encodes the output so that control characters
+        // get encoded as regular characters. Otherwise any control characters
+        // in the store funcspec would break the job.xml which is created by
+        // hadoop from the jobconf.
+        outputConf.set("pig.storeFunc", ObjectSerializer.serialize(sFile.getFuncSpec().toString()));
 
         // We set the output dir to the final location of the output,
         // the output dir set in the original job config points to the

Modified: hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/util/MapRedUtil.java
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/util/MapRedUtil.java?rev=780113&r1=780112&r2=780113&view=diff
==============================================================================
--- hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/util/MapRedUtil.java (original)
+++ hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/util/MapRedUtil.java Fri May 29 21:27:27 2009
@@ -34,18 +34,19 @@
      */
     public static StoreFunc getStoreFunc(JobConf conf) throws ExecException {
         StoreFunc store;
-        String storeFunc = conf.get("pig.storeFunc", "");
-        if (storeFunc.length() == 0) {
-            store = new PigStorage();
-        } else {
-            try {
+        try {
+            String storeFunc = conf.get("pig.storeFunc", "");
+            if (storeFunc.length() == 0) {
+                store = new PigStorage();
+            } else {
+                storeFunc = (String) ObjectSerializer.deserialize(storeFunc);
                 store = (StoreFunc) PigContext
                         .instantiateFuncFromSpec(storeFunc);
-            } catch (Exception e) {
-                int errCode = 2081;
-                String msg = "Unable to setup the store function.";
-                throw new ExecException(msg, errCode, PigException.BUG, e);
             }
+        } catch (Exception e) {
+            int errCode = 2081;
+            String msg = "Unable to setup the store function.";
+            throw new ExecException(msg, errCode, PigException.BUG, e);
         }
         return store;
     }

Modified: hadoop/pig/trunk/test/org/apache/pig/test/TestEvalPipeline2.java
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/test/org/apache/pig/test/TestEvalPipeline2.java?rev=780113&r1=780112&r2=780113&view=diff
==============================================================================
--- hadoop/pig/trunk/test/org/apache/pig/test/TestEvalPipeline2.java (original)
+++ hadoop/pig/trunk/test/org/apache/pig/test/TestEvalPipeline2.java Fri May 29 21:27:27 2009
@@ -290,5 +290,33 @@
 
         Util.deleteFile(cluster, "table_bs_ac_clxt");
     }
+    
+    @Test
+    public void testPigStorageWithCtrlChars() throws Exception {
+        String[] inputData = { "hello\u0001world", "good\u0001morning", "nice\u0001day" };
+        Util.createInputFile(cluster, "testPigStorageWithCtrlCharsInput.txt", inputData);
+        String script = "a = load 'testPigStorageWithCtrlCharsInput.txt' using PigStorage('\u0001');" +
+        		"b = foreach a generate $0, CONCAT($0, '\u0005'), $1; " +
+        		"store b into 'testPigStorageWithCtrlCharsOutput.txt' using PigStorage('\u0001');" +
+        		"c = load 'testPigStorageWithCtrlCharsOutput.txt' using PigStorage('\u0001') as (f1:chararray, f2:chararray, f3:chararray);";
+        Util.registerQuery(pigServer, script);
+        Iterator<Tuple> it  = pigServer.openIterator("c");
+        HashMap<String, Tuple> expectedResults = new HashMap<String, Tuple>();
+        expectedResults.put("hello", (Tuple) Util.getPigConstant("('hello','hello\u0005','world')"));
+        expectedResults.put("good", (Tuple) Util.getPigConstant("('good','good\u0005','morning')"));
+        expectedResults.put("nice", (Tuple) Util.getPigConstant("('nice','nice\u0005','day')"));
+        HashMap<String, Boolean> seen = new HashMap<String, Boolean>();
+        int numRows = 0;
+        while(it.hasNext()) {
+            Tuple t = it.next();
+            String firstCol = (String) t.get(0);
+            assertFalse(seen.containsKey(firstCol));
+            seen.put(firstCol, true);
+            assertEquals(expectedResults.get(firstCol), t);
+            numRows++;
+        }
+        assertEquals(3, numRows);
+        
+    }
 
 }