You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pig.apache.org by ro...@apache.org on 2015/10/09 13:35:22 UTC
svn commit: r1707696 - in /pig/trunk: ./
src/org/apache/pig/backend/hadoop/executionengine/tez/
src/org/apache/pig/tools/pigstats/
src/org/apache/pig/tools/pigstats/mapreduce/
src/org/apache/pig/tools/pigstats/tez/ test/org/apache/pig/test/
test/org/ap...
Author: rohini
Date: Fri Oct 9 11:35:20 2015
New Revision: 1707696
URL: http://svn.apache.org/viewvc?rev=1707696&view=rev
Log:
PIG-4657: Compress pig.script before encoding (sandyridgeracer via rohini)
Modified:
pig/trunk/CHANGES.txt
pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/tez/TezJobCompiler.java
pig/trunk/src/org/apache/pig/tools/pigstats/ScriptState.java
pig/trunk/src/org/apache/pig/tools/pigstats/mapreduce/MRScriptState.java
pig/trunk/src/org/apache/pig/tools/pigstats/tez/TezScriptState.java
pig/trunk/test/org/apache/pig/test/TestPigStats.java
pig/trunk/test/org/apache/pig/test/TestPigStatsMR.java
pig/trunk/test/org/apache/pig/tez/TestPigStatsTez.java
Modified: pig/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/pig/trunk/CHANGES.txt?rev=1707696&r1=1707695&r2=1707696&view=diff
==============================================================================
--- pig/trunk/CHANGES.txt (original)
+++ pig/trunk/CHANGES.txt Fri Oct 9 11:35:20 2015
@@ -24,6 +24,8 @@ INCOMPATIBLE CHANGES
IMPROVEMENTS
+PIG-4657: Compress pig.script before encoding (sandyridgeracer via rohini)
+
PIG-4670: Embedded Python scripts still parse line by line (rohini)
PIG-4663: HBaseStorage should allow the MaxResultsPerColumnFamily limit to avoid memory or scan timeout issues (pmazak via rohini)
Modified: pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/tez/TezJobCompiler.java
URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/tez/TezJobCompiler.java?rev=1707696&r1=1707695&r2=1707696&view=diff
==============================================================================
--- pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/tez/TezJobCompiler.java (original)
+++ pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/tez/TezJobCompiler.java Fri Oct 9 11:35:20 2015
@@ -110,8 +110,7 @@ public class TezJobCompiler {
log.info("Local resource: " + entry.getKey());
}
DAG tezDag = buildDAG(tezPlanNode, localResources);
- String script = new String(Base64.decodeBase64(TezScriptState.get().getScript()));
- tezDag.setDAGInfo(createDagInfo(script));
+ tezDag.setDAGInfo(createDagInfo(TezScriptState.get().getScript()));
return new TezJob(tezConf, tezDag, localResources, tezPlan.getEstimatedTotalParallelism());
} catch (Exception e) {
int errCode = 2017;
Modified: pig/trunk/src/org/apache/pig/tools/pigstats/ScriptState.java
URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/tools/pigstats/ScriptState.java?rev=1707696&r1=1707695&r2=1707696&view=diff
==============================================================================
--- pig/trunk/src/org/apache/pig/tools/pigstats/ScriptState.java (original)
+++ pig/trunk/src/org/apache/pig/tools/pigstats/ScriptState.java Fri Oct 9 11:35:20 2015
@@ -63,6 +63,7 @@ import org.apache.pig.impl.plan.DepthFir
import org.apache.pig.impl.plan.OperatorPlan;
import org.apache.pig.impl.plan.VisitorException;
import org.apache.pig.impl.util.JarManager;
+import org.apache.pig.impl.util.ObjectSerializer;
import org.apache.pig.newplan.logical.relational.LOCogroup;
import org.apache.pig.newplan.logical.relational.LOCogroup.GROUPTYPE;
import org.apache.pig.newplan.logical.relational.LOCross;
@@ -165,7 +166,8 @@ public abstract class ScriptState {
protected String id;
- protected String script;
+ protected String serializedScript;
+ protected String truncatedScript;
protected String commandLine;
protected String fileName;
@@ -180,7 +182,8 @@ public abstract class ScriptState {
protected ScriptState(String id) {
this.id = id;
- this.script = "";
+ this.serializedScript = "";
+ this.truncatedScript = "";
}
public static ScriptState get() {
@@ -272,7 +275,7 @@ public abstract class ScriptState {
}
}
- public void setScript(File file) {
+ public void setScript(File file) throws IOException {
BufferedReader reader = null;
try {
reader = new BufferedReader(new FileReader(file));
@@ -289,10 +292,18 @@ public abstract class ScriptState {
}
}
- public void setScript(String script) {
+ public void setScript(String script) throws IOException {
if (script == null)
return;
+ //Retain the truncated script
+ setTruncatedScript(script);
+
+ //Serialize and encode the string.
+ this.serializedScript = ObjectSerializer.serialize(script);
+ }
+
+ private void setTruncatedScript(String script) {
// restrict the size of the script to be stored in job conf
int maxScriptSize = 10240;
if (pigContext != null) {
@@ -301,13 +312,10 @@ public abstract class ScriptState {
maxScriptSize = Integer.valueOf(prop);
}
}
- script = (script.length() > maxScriptSize) ? script.substring(0, maxScriptSize)
+
+ this.truncatedScript = (script.length() > maxScriptSize) ? script.substring(0, maxScriptSize)
: script;
- // XML parser cann't handle certain characters, including
- // the control character (). Use Base64 encoding to
- // get around this problem
- this.script = new String(Base64.encodeBase64(script.getBytes()));
}
public void setScriptFeatures(LogicalPlan plan) {
@@ -372,11 +380,15 @@ public abstract class ScriptState {
return (commandLine == null) ? "" : commandLine;
}
+ public String getSerializedScript() {
+ return (serializedScript == null) ? "" : serializedScript;
+ }
+
public String getScript() {
- return (script == null) ? "" : script;
+ return (truncatedScript == null) ? "" : truncatedScript;
}
- protected void setScript(BufferedReader reader) {
+ protected void setScript(BufferedReader reader) throws IOException {
StringBuilder sb = new StringBuilder();
try {
String line = reader.readLine();
Modified: pig/trunk/src/org/apache/pig/tools/pigstats/mapreduce/MRScriptState.java
URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/tools/pigstats/mapreduce/MRScriptState.java?rev=1707696&r1=1707695&r2=1707696&view=diff
==============================================================================
--- pig/trunk/src/org/apache/pig/tools/pigstats/mapreduce/MRScriptState.java (original)
+++ pig/trunk/src/org/apache/pig/tools/pigstats/mapreduce/MRScriptState.java Fri Oct 9 11:35:20 2015
@@ -72,7 +72,7 @@ public class MRScriptState extends Scrip
conf.set(PIG_PROPERTY.HADOOP_VERSION.toString(), getHadoopVersion());
conf.set(PIG_PROPERTY.VERSION.toString(), getPigVersion());
conf.set(PIG_PROPERTY.SCRIPT_ID.toString(), id);
- conf.set(PIG_PROPERTY.SCRIPT.toString(), getScript());
+ conf.set(PIG_PROPERTY.SCRIPT.toString(), getSerializedScript());
conf.set(PIG_PROPERTY.COMMAND_LINE.toString(), getCommandLine());
try {
Modified: pig/trunk/src/org/apache/pig/tools/pigstats/tez/TezScriptState.java
URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/tools/pigstats/tez/TezScriptState.java?rev=1707696&r1=1707695&r2=1707696&view=diff
==============================================================================
--- pig/trunk/src/org/apache/pig/tools/pigstats/tez/TezScriptState.java (original)
+++ pig/trunk/src/org/apache/pig/tools/pigstats/tez/TezScriptState.java Fri Oct 9 11:35:20 2015
@@ -107,7 +107,7 @@ public class TezScriptState extends Scri
conf.set(PIG_PROPERTY.HADOOP_VERSION.toString(), getHadoopVersion());
conf.set(PIG_PROPERTY.VERSION.toString(), getPigVersion());
conf.set(PIG_PROPERTY.SCRIPT_ID.toString(), id);
- conf.set(PIG_PROPERTY.SCRIPT.toString(), getScript());
+ conf.set(PIG_PROPERTY.SCRIPT.toString(), getSerializedScript());
conf.set(PIG_PROPERTY.COMMAND_LINE.toString(), getCommandLine());
}
Modified: pig/trunk/test/org/apache/pig/test/TestPigStats.java
URL: http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/test/TestPigStats.java?rev=1707696&r1=1707695&r2=1707696&view=diff
==============================================================================
--- pig/trunk/test/org/apache/pig/test/TestPigStats.java (original)
+++ pig/trunk/test/org/apache/pig/test/TestPigStats.java Fri Oct 9 11:35:20 2015
@@ -26,7 +26,6 @@ import java.io.FileWriter;
import java.io.IOException;
import java.io.PrintWriter;
-import org.apache.commons.codec.binary.Base64;
import org.apache.commons.io.FileUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
@@ -36,6 +35,7 @@ import org.apache.pig.backend.executione
import org.apache.pig.backend.hadoop.executionengine.HExecutionEngine;
import org.apache.pig.backend.hadoop.executionengine.physicalLayer.plans.PhysicalPlan;
import org.apache.pig.impl.PigContext;
+import org.apache.pig.impl.util.ObjectSerializer;
import org.apache.pig.newplan.logical.relational.LogicalPlan;
import org.apache.pig.tools.pigstats.PigStats;
import org.junit.Ignore;
@@ -46,7 +46,7 @@ abstract public class TestPigStats {
protected static final Log LOG = LogFactory.getLog(TestPigStats.class);
- abstract public void addSettingsToConf(Configuration conf, String scriptFileName);
+ abstract public void addSettingsToConf(Configuration conf, String scriptFileName) throws IOException;
@Test
public void testPigScriptInConf() throws Exception {
@@ -61,7 +61,7 @@ abstract public class TestPigStats {
addSettingsToConf(conf, "test.pig");
String s = conf.get("pig.script");
- String script = new String(Base64.decodeBase64(s.getBytes()));
+ String script = (String) ObjectSerializer.deserialize(s);
String expected =
"register /mydir/sath.jar\n" +
@@ -95,7 +95,7 @@ abstract public class TestPigStats {
addSettingsToConf(conf, "testScript.py");
String s = conf.get("pig.script");
- String actual = new String(Base64.decodeBase64(s.getBytes()));
+ String actual = (String) ObjectSerializer.deserialize(s);
String expected =
"#!/usr/bin/python\n" +
Modified: pig/trunk/test/org/apache/pig/test/TestPigStatsMR.java
URL: http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/test/TestPigStatsMR.java?rev=1707696&r1=1707695&r2=1707696&view=diff
==============================================================================
--- pig/trunk/test/org/apache/pig/test/TestPigStatsMR.java (original)
+++ pig/trunk/test/org/apache/pig/test/TestPigStatsMR.java Fri Oct 9 11:35:20 2015
@@ -73,7 +73,7 @@ public class TestPigStatsMR extends Test
}
@Override
- public void addSettingsToConf(Configuration conf, String scriptFileName) {
+ public void addSettingsToConf(Configuration conf, String scriptFileName) throws IOException {
MRScriptState ss = MRScriptState.get();
ss.setScript(new File(scriptFileName));
MapReduceOper mro = new MapReduceOper(new OperatorKey());
Modified: pig/trunk/test/org/apache/pig/tez/TestPigStatsTez.java
URL: http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/tez/TestPigStatsTez.java?rev=1707696&r1=1707695&r2=1707696&view=diff
==============================================================================
--- pig/trunk/test/org/apache/pig/tez/TestPigStatsTez.java (original)
+++ pig/trunk/test/org/apache/pig/tez/TestPigStatsTez.java Fri Oct 9 11:35:20 2015
@@ -20,6 +20,7 @@ package org.apache.pig.tez;
import static org.junit.Assert.assertEquals;
import java.io.File;
+import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.pig.backend.executionengine.ExecJob;
@@ -38,7 +39,7 @@ import org.apache.pig.tools.pigstats.tez
public class TestPigStatsTez extends TestPigStats {
@Override
- public void addSettingsToConf(Configuration conf, String scriptFileName) {
+ public void addSettingsToConf(Configuration conf, String scriptFileName) throws IOException {
TezScriptState ss = TezScriptState.get();
ss.setScript(new File(scriptFileName));
ss.addDAGSettingsToConf(conf);