You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pig.apache.org by ro...@apache.org on 2015/10/09 13:35:22 UTC

svn commit: r1707696 - in /pig/trunk: ./ src/org/apache/pig/backend/hadoop/executionengine/tez/ src/org/apache/pig/tools/pigstats/ src/org/apache/pig/tools/pigstats/mapreduce/ src/org/apache/pig/tools/pigstats/tez/ test/org/apache/pig/test/ test/org/ap...

Author: rohini
Date: Fri Oct  9 11:35:20 2015
New Revision: 1707696

URL: http://svn.apache.org/viewvc?rev=1707696&view=rev
Log:
PIG-4657: Compress pig.script before encoding (sandyridgeracer via rohini)

Modified:
    pig/trunk/CHANGES.txt
    pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/tez/TezJobCompiler.java
    pig/trunk/src/org/apache/pig/tools/pigstats/ScriptState.java
    pig/trunk/src/org/apache/pig/tools/pigstats/mapreduce/MRScriptState.java
    pig/trunk/src/org/apache/pig/tools/pigstats/tez/TezScriptState.java
    pig/trunk/test/org/apache/pig/test/TestPigStats.java
    pig/trunk/test/org/apache/pig/test/TestPigStatsMR.java
    pig/trunk/test/org/apache/pig/tez/TestPigStatsTez.java

Modified: pig/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/pig/trunk/CHANGES.txt?rev=1707696&r1=1707695&r2=1707696&view=diff
==============================================================================
--- pig/trunk/CHANGES.txt (original)
+++ pig/trunk/CHANGES.txt Fri Oct  9 11:35:20 2015
@@ -24,6 +24,8 @@ INCOMPATIBLE CHANGES
 
 IMPROVEMENTS
 
+PIG-4657: Compress pig.script before encoding (sandyridgeracer via rohini)
+
 PIG-4670: Embedded Python scripts still parse line by line (rohini)
 
 PIG-4663: HBaseStorage should allow the MaxResultsPerColumnFamily limit to avoid memory or scan timeout issues (pmazak via rohini)

Modified: pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/tez/TezJobCompiler.java
URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/tez/TezJobCompiler.java?rev=1707696&r1=1707695&r2=1707696&view=diff
==============================================================================
--- pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/tez/TezJobCompiler.java (original)
+++ pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/tez/TezJobCompiler.java Fri Oct  9 11:35:20 2015
@@ -110,8 +110,7 @@ public class TezJobCompiler {
                 log.info("Local resource: " + entry.getKey());
             }
             DAG tezDag = buildDAG(tezPlanNode, localResources);
-            String script = new String(Base64.decodeBase64(TezScriptState.get().getScript()));
-            tezDag.setDAGInfo(createDagInfo(script));
+            tezDag.setDAGInfo(createDagInfo(TezScriptState.get().getScript()));
             return new TezJob(tezConf, tezDag, localResources, tezPlan.getEstimatedTotalParallelism());
         } catch (Exception e) {
             int errCode = 2017;

Modified: pig/trunk/src/org/apache/pig/tools/pigstats/ScriptState.java
URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/tools/pigstats/ScriptState.java?rev=1707696&r1=1707695&r2=1707696&view=diff
==============================================================================
--- pig/trunk/src/org/apache/pig/tools/pigstats/ScriptState.java (original)
+++ pig/trunk/src/org/apache/pig/tools/pigstats/ScriptState.java Fri Oct  9 11:35:20 2015
@@ -63,6 +63,7 @@ import org.apache.pig.impl.plan.DepthFir
 import org.apache.pig.impl.plan.OperatorPlan;
 import org.apache.pig.impl.plan.VisitorException;
 import org.apache.pig.impl.util.JarManager;
+import org.apache.pig.impl.util.ObjectSerializer;
 import org.apache.pig.newplan.logical.relational.LOCogroup;
 import org.apache.pig.newplan.logical.relational.LOCogroup.GROUPTYPE;
 import org.apache.pig.newplan.logical.relational.LOCross;
@@ -165,7 +166,8 @@ public abstract class ScriptState {
 
     protected String id;
 
-    protected String script;
+    protected String serializedScript;
+    protected String truncatedScript;
     protected String commandLine;
     protected String fileName;
 
@@ -180,7 +182,8 @@ public abstract class ScriptState {
 
     protected ScriptState(String id) {
         this.id = id;
-        this.script = "";
+        this.serializedScript = "";
+        this.truncatedScript = "";
     }
 
     public static ScriptState get() {
@@ -272,7 +275,7 @@ public abstract class ScriptState {
         }
     }
 
-    public void setScript(File file) {
+    public void setScript(File file) throws IOException {
         BufferedReader reader = null;
         try {
             reader = new BufferedReader(new FileReader(file));
@@ -289,10 +292,18 @@ public abstract class ScriptState {
         }
     }
 
-    public void setScript(String script) {
+    public void setScript(String script) throws IOException {
         if (script == null)
             return;
 
+        //Retain the truncated script
+        setTruncatedScript(script);
+
+        //Serialize and encode the string.
+        this.serializedScript =  ObjectSerializer.serialize(script);
+    }
+
+    private void setTruncatedScript(String script) {
         // restrict the size of the script to be stored in job conf
         int maxScriptSize = 10240;
         if (pigContext != null) {
@@ -301,13 +312,10 @@ public abstract class ScriptState {
                 maxScriptSize = Integer.valueOf(prop);
             }
         }
-        script = (script.length() > maxScriptSize) ? script.substring(0, maxScriptSize)
+       
+        this.truncatedScript = (script.length() > maxScriptSize) ? script.substring(0, maxScriptSize)
                                                    : script;
 
-        // XML parser cann't handle certain characters, including
-        // the control character (&#1). Use Base64 encoding to
-        // get around this problem
-        this.script = new String(Base64.encodeBase64(script.getBytes()));
     }
 
     public void setScriptFeatures(LogicalPlan plan) {
@@ -372,11 +380,15 @@ public abstract class ScriptState {
         return (commandLine == null) ? "" : commandLine;
     }
 
+    public String getSerializedScript() {
+        return (serializedScript == null) ? "" : serializedScript;
+    }
+
     public String getScript() {
-        return (script == null) ? "" : script;
+        return (truncatedScript == null) ? "" : truncatedScript;
     }
 
-    protected void setScript(BufferedReader reader) {
+    protected void setScript(BufferedReader reader) throws IOException {
         StringBuilder sb = new StringBuilder();
         try {
             String line = reader.readLine();

Modified: pig/trunk/src/org/apache/pig/tools/pigstats/mapreduce/MRScriptState.java
URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/tools/pigstats/mapreduce/MRScriptState.java?rev=1707696&r1=1707695&r2=1707696&view=diff
==============================================================================
--- pig/trunk/src/org/apache/pig/tools/pigstats/mapreduce/MRScriptState.java (original)
+++ pig/trunk/src/org/apache/pig/tools/pigstats/mapreduce/MRScriptState.java Fri Oct  9 11:35:20 2015
@@ -72,7 +72,7 @@ public class MRScriptState extends Scrip
         conf.set(PIG_PROPERTY.HADOOP_VERSION.toString(), getHadoopVersion());
         conf.set(PIG_PROPERTY.VERSION.toString(), getPigVersion());
         conf.set(PIG_PROPERTY.SCRIPT_ID.toString(), id);
-        conf.set(PIG_PROPERTY.SCRIPT.toString(), getScript());
+        conf.set(PIG_PROPERTY.SCRIPT.toString(), getSerializedScript());
         conf.set(PIG_PROPERTY.COMMAND_LINE.toString(), getCommandLine());
 
         try {

Modified: pig/trunk/src/org/apache/pig/tools/pigstats/tez/TezScriptState.java
URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/tools/pigstats/tez/TezScriptState.java?rev=1707696&r1=1707695&r2=1707696&view=diff
==============================================================================
--- pig/trunk/src/org/apache/pig/tools/pigstats/tez/TezScriptState.java (original)
+++ pig/trunk/src/org/apache/pig/tools/pigstats/tez/TezScriptState.java Fri Oct  9 11:35:20 2015
@@ -107,7 +107,7 @@ public class TezScriptState extends Scri
         conf.set(PIG_PROPERTY.HADOOP_VERSION.toString(), getHadoopVersion());
         conf.set(PIG_PROPERTY.VERSION.toString(), getPigVersion());
         conf.set(PIG_PROPERTY.SCRIPT_ID.toString(), id);
-        conf.set(PIG_PROPERTY.SCRIPT.toString(), getScript());
+        conf.set(PIG_PROPERTY.SCRIPT.toString(), getSerializedScript());
         conf.set(PIG_PROPERTY.COMMAND_LINE.toString(), getCommandLine());
     }
 

Modified: pig/trunk/test/org/apache/pig/test/TestPigStats.java
URL: http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/test/TestPigStats.java?rev=1707696&r1=1707695&r2=1707696&view=diff
==============================================================================
--- pig/trunk/test/org/apache/pig/test/TestPigStats.java (original)
+++ pig/trunk/test/org/apache/pig/test/TestPigStats.java Fri Oct  9 11:35:20 2015
@@ -26,7 +26,6 @@ import java.io.FileWriter;
 import java.io.IOException;
 import java.io.PrintWriter;
 
-import org.apache.commons.codec.binary.Base64;
 import org.apache.commons.io.FileUtils;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
@@ -36,6 +35,7 @@ import org.apache.pig.backend.executione
 import org.apache.pig.backend.hadoop.executionengine.HExecutionEngine;
 import org.apache.pig.backend.hadoop.executionengine.physicalLayer.plans.PhysicalPlan;
 import org.apache.pig.impl.PigContext;
+import org.apache.pig.impl.util.ObjectSerializer;
 import org.apache.pig.newplan.logical.relational.LogicalPlan;
 import org.apache.pig.tools.pigstats.PigStats;
 import org.junit.Ignore;
@@ -46,7 +46,7 @@ abstract public class TestPigStats  {
 
     protected static final Log LOG = LogFactory.getLog(TestPigStats.class);
 
-    abstract public void addSettingsToConf(Configuration conf, String scriptFileName);
+    abstract public void addSettingsToConf(Configuration conf, String scriptFileName) throws IOException;
 
     @Test
     public void testPigScriptInConf() throws Exception {
@@ -61,7 +61,7 @@ abstract public class TestPigStats  {
         addSettingsToConf(conf, "test.pig");
 
         String s = conf.get("pig.script");
-        String script = new String(Base64.decodeBase64(s.getBytes()));
+        String script = (String) ObjectSerializer.deserialize(s);
 
         String expected =
             "register /mydir/sath.jar\n" +
@@ -95,7 +95,7 @@ abstract public class TestPigStats  {
         addSettingsToConf(conf, "testScript.py");
 
         String s = conf.get("pig.script");
-        String actual = new String(Base64.decodeBase64(s.getBytes()));
+        String actual = (String) ObjectSerializer.deserialize(s);
 
         String expected =
             "#!/usr/bin/python\n" +

Modified: pig/trunk/test/org/apache/pig/test/TestPigStatsMR.java
URL: http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/test/TestPigStatsMR.java?rev=1707696&r1=1707695&r2=1707696&view=diff
==============================================================================
--- pig/trunk/test/org/apache/pig/test/TestPigStatsMR.java (original)
+++ pig/trunk/test/org/apache/pig/test/TestPigStatsMR.java Fri Oct  9 11:35:20 2015
@@ -73,7 +73,7 @@ public class TestPigStatsMR extends Test
     }
 
     @Override
-    public void addSettingsToConf(Configuration conf, String scriptFileName) {
+    public void addSettingsToConf(Configuration conf, String scriptFileName) throws IOException {
         MRScriptState ss = MRScriptState.get();
         ss.setScript(new File(scriptFileName));
         MapReduceOper mro = new MapReduceOper(new OperatorKey());

Modified: pig/trunk/test/org/apache/pig/tez/TestPigStatsTez.java
URL: http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/tez/TestPigStatsTez.java?rev=1707696&r1=1707695&r2=1707696&view=diff
==============================================================================
--- pig/trunk/test/org/apache/pig/tez/TestPigStatsTez.java (original)
+++ pig/trunk/test/org/apache/pig/tez/TestPigStatsTez.java Fri Oct  9 11:35:20 2015
@@ -20,6 +20,7 @@ package org.apache.pig.tez;
 import static org.junit.Assert.assertEquals;
 
 import java.io.File;
+import java.io.IOException;
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.pig.backend.executionengine.ExecJob;
@@ -38,7 +39,7 @@ import org.apache.pig.tools.pigstats.tez
 
 public class TestPigStatsTez extends TestPigStats {
     @Override
-    public void addSettingsToConf(Configuration conf, String scriptFileName) {
+    public void addSettingsToConf(Configuration conf, String scriptFileName) throws IOException {
         TezScriptState ss = TezScriptState.get();
         ss.setScript(new File(scriptFileName));
         ss.addDAGSettingsToConf(conf);