You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pig.apache.org by ch...@apache.org on 2014/08/14 20:32:45 UTC
svn commit: r1618010 - in /pig/trunk: CHANGES.txt conf/pig.properties
src/org/apache/pig/PigConfiguration.java
src/org/apache/pig/scripting/streaming/python/PythonScriptEngine.java
Author: cheolsoo
Date: Thu Aug 14 18:32:44 2014
New Revision: 1618010
URL: http://svn.apache.org/r1618010
Log:
PIG-4124: Command for Python streaming udf should be configurable (cheolsoo)
Modified:
pig/trunk/CHANGES.txt
pig/trunk/conf/pig.properties
pig/trunk/src/org/apache/pig/PigConfiguration.java
pig/trunk/src/org/apache/pig/scripting/streaming/python/PythonScriptEngine.java
Modified: pig/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/pig/trunk/CHANGES.txt?rev=1618010&r1=1618009&r2=1618010&view=diff
==============================================================================
--- pig/trunk/CHANGES.txt (original)
+++ pig/trunk/CHANGES.txt Thu Aug 14 18:32:44 2014
@@ -24,6 +24,8 @@ INCOMPATIBLE CHANGES
IMPROVEMENTS
+PIG-4124: Command for Python streaming udf should be configurable (cheolsoo)
+
PIG-4114: Add Native operator to tez (daijy)
PIG-4117: Implement merge cogroup in Tez (daijy)
Modified: pig/trunk/conf/pig.properties
URL: http://svn.apache.org/viewvc/pig/trunk/conf/pig.properties?rev=1618010&r1=1618009&r2=1618010&view=diff
==============================================================================
--- pig/trunk/conf/pig.properties (original)
+++ pig/trunk/conf/pig.properties Thu Aug 14 18:32:44 2014
@@ -565,3 +565,30 @@ hcat.bin=/usr/local/hcat/bin/hcat
# you encounter some bug in automatic parallelism. If set to false, use 1 as
# default parallelism
pig.tez.auto.parallelism=true
+
+###########################################################################
+#
+# Streaming properties
+#
+
+# Define what properties will be set in the streaming environment. Just set this
+# property to a comma-delimited list of properties to set, and those properties
+# will be set in the environment.
+#
+# pig.streaming.environment=<comma-delimited list of propertes>
+
+# Specify a comma-delimited list of local files to ship to distributed cache for
+# streaming job.
+#
+# pig.streaming.ship.files=<comma-delimited list of local files>
+
+# Specify a comma-delimited list of remote files to cache on distributed cache
+# for streaming job.
+#
+# pig.streaming.cache.files=<comma-delimited list of remote files>
+
+# Specify the python command to be used for python streaming udf. By default,
+# python is used, but you can overwrite it with a non-default version such as
+# python2.7.
+#
+# pig.streaming.udf.python.command=python
Modified: pig/trunk/src/org/apache/pig/PigConfiguration.java
URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/PigConfiguration.java?rev=1618010&r1=1618009&r2=1618010&view=diff
==============================================================================
--- pig/trunk/src/org/apache/pig/PigConfiguration.java (original)
+++ pig/trunk/src/org/apache/pig/PigConfiguration.java Thu Aug 14 18:32:44 2014
@@ -115,6 +115,12 @@ public class PigConfiguration {
public static final String PIG_STREAMING_ENVIRONMENT = "pig.streaming.environment";
/**
+ * This key can be used to configure the python command for python streaming
+ * udf. For eg, python2.7.
+ */
+ public static final String PIG_STREAMING_UDF_PYTHON_COMMAND = "pig.streaming.udf.python.command";
+
+ /**
* This key is used to define the default load func. Pig will fallback on PigStorage
* as default in case this is undefined.
*/
Modified: pig/trunk/src/org/apache/pig/scripting/streaming/python/PythonScriptEngine.java
URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/scripting/streaming/python/PythonScriptEngine.java?rev=1618010&r1=1618009&r2=1618010&view=diff
==============================================================================
--- pig/trunk/src/org/apache/pig/scripting/streaming/python/PythonScriptEngine.java (original)
+++ pig/trunk/src/org/apache/pig/scripting/streaming/python/PythonScriptEngine.java Thu Aug 14 18:32:44 2014
@@ -33,6 +33,7 @@ import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.pig.ExecType;
import org.apache.pig.FuncSpec;
+import org.apache.pig.PigConfiguration;
import org.apache.pig.impl.PigContext;
import org.apache.pig.scripting.ScriptEngine;
import org.apache.pig.tools.pigstats.PigStats;
@@ -43,7 +44,9 @@ public class PythonScriptEngine extends
@Override
public void registerFunctions(String path, String namespace,
PigContext pigContext) throws IOException {
-
+
+ String command = pigContext.getProperties().getProperty(
+ PigConfiguration.PIG_STREAMING_UDF_PYTHON_COMMAND, "python");
String fileName = path.substring(0, path.length() - ".py".length());
log.debug("Path: " + path + " FileName: " + fileName + " Namespace: " + namespace);
File f = new File(path);
@@ -66,7 +69,7 @@ public class PythonScriptEngine extends
pigContext.registerFunction(alias,
new FuncSpec("StreamingUDF",
new String[] {
- "python",
+ command,
fileName, name,
schemaString, schemaLineNumber,
execType, isIllustrate