You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pig.apache.org by ch...@apache.org on 2014/08/14 20:32:45 UTC

svn commit: r1618010 - in /pig/trunk: CHANGES.txt conf/pig.properties src/org/apache/pig/PigConfiguration.java src/org/apache/pig/scripting/streaming/python/PythonScriptEngine.java

Author: cheolsoo
Date: Thu Aug 14 18:32:44 2014
New Revision: 1618010

URL: http://svn.apache.org/r1618010
Log:
PIG-4124: Command for Python streaming udf should be configurable (cheolsoo)

Modified:
    pig/trunk/CHANGES.txt
    pig/trunk/conf/pig.properties
    pig/trunk/src/org/apache/pig/PigConfiguration.java
    pig/trunk/src/org/apache/pig/scripting/streaming/python/PythonScriptEngine.java

Modified: pig/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/pig/trunk/CHANGES.txt?rev=1618010&r1=1618009&r2=1618010&view=diff
==============================================================================
--- pig/trunk/CHANGES.txt (original)
+++ pig/trunk/CHANGES.txt Thu Aug 14 18:32:44 2014
@@ -24,6 +24,8 @@ INCOMPATIBLE CHANGES
  
 IMPROVEMENTS
 
+PIG-4124: Command for Python streaming udf should be configurable (cheolsoo)
+
 PIG-4114: Add Native operator to tez (daijy)
 
 PIG-4117: Implement merge cogroup in Tez (daijy)

Modified: pig/trunk/conf/pig.properties
URL: http://svn.apache.org/viewvc/pig/trunk/conf/pig.properties?rev=1618010&r1=1618009&r2=1618010&view=diff
==============================================================================
--- pig/trunk/conf/pig.properties (original)
+++ pig/trunk/conf/pig.properties Thu Aug 14 18:32:44 2014
@@ -565,3 +565,30 @@ hcat.bin=/usr/local/hcat/bin/hcat
 # you encounter some bug in automatic parallelism. If set to false, use 1 as
 # default parallelism
 pig.tez.auto.parallelism=true
+
+###########################################################################
+#
+# Streaming properties
+#
+
+# Define what properties will be set in the streaming environment. Just set this
+# property to a comma-delimited list of properties to set, and those properties
+# will be set in the environment.
+#
+# pig.streaming.environment=<comma-delimited list of propertes>
+
+# Specify a comma-delimited list of local files to ship to distributed cache for
+# streaming job.
+#
+# pig.streaming.ship.files=<comma-delimited list of local files>
+
+# Specify a comma-delimited list of remote files to cache on distributed cache
+# for streaming job.
+#
+# pig.streaming.cache.files=<comma-delimited list of remote files>
+
+# Specify the python command to be used for python streaming udf. By default,
+# python is used, but you can overwrite it with a non-default version such as
+# python2.7.
+#
+# pig.streaming.udf.python.command=python

Modified: pig/trunk/src/org/apache/pig/PigConfiguration.java
URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/PigConfiguration.java?rev=1618010&r1=1618009&r2=1618010&view=diff
==============================================================================
--- pig/trunk/src/org/apache/pig/PigConfiguration.java (original)
+++ pig/trunk/src/org/apache/pig/PigConfiguration.java Thu Aug 14 18:32:44 2014
@@ -115,6 +115,12 @@ public class PigConfiguration {
     public static final String PIG_STREAMING_ENVIRONMENT = "pig.streaming.environment";
 
     /**
+     * This key can be used to configure the python command for python streaming
+     * udf. For eg, python2.7.
+     */
+    public static final String PIG_STREAMING_UDF_PYTHON_COMMAND = "pig.streaming.udf.python.command";
+
+    /**
      * This key is used to define the default load func. Pig will fallback on PigStorage
      * as default in case this is undefined.
      */

Modified: pig/trunk/src/org/apache/pig/scripting/streaming/python/PythonScriptEngine.java
URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/scripting/streaming/python/PythonScriptEngine.java?rev=1618010&r1=1618009&r2=1618010&view=diff
==============================================================================
--- pig/trunk/src/org/apache/pig/scripting/streaming/python/PythonScriptEngine.java (original)
+++ pig/trunk/src/org/apache/pig/scripting/streaming/python/PythonScriptEngine.java Thu Aug 14 18:32:44 2014
@@ -33,6 +33,7 @@ import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.pig.ExecType;
 import org.apache.pig.FuncSpec;
+import org.apache.pig.PigConfiguration;
 import org.apache.pig.impl.PigContext;
 import org.apache.pig.scripting.ScriptEngine;
 import org.apache.pig.tools.pigstats.PigStats;
@@ -43,7 +44,9 @@ public class PythonScriptEngine extends 
     @Override
     public void registerFunctions(String path, String namespace,
             PigContext pigContext) throws IOException {
-        
+
+        String command = pigContext.getProperties().getProperty(
+                PigConfiguration.PIG_STREAMING_UDF_PYTHON_COMMAND, "python");
         String fileName = path.substring(0, path.length() - ".py".length());
         log.debug("Path: " + path + " FileName: " + fileName + " Namespace: " + namespace);
         File f = new File(path);
@@ -66,7 +69,7 @@ public class PythonScriptEngine extends 
             pigContext.registerFunction(alias, 
                                         new FuncSpec("StreamingUDF", 
                                                 new String[] {
-                                                    "python", 
+                                                    command, 
                                                     fileName, name, 
                                                     schemaString, schemaLineNumber,
                                                     execType, isIllustrate