You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@asterixdb.apache.org by al...@apache.org on 2021/04/07 12:47:48 UTC

[asterixdb] 07/25: [ASTERIXDB-2855] Allow additions to Python UDF env

This is an automated email from the ASF dual-hosted git repository.

alsuliman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/asterixdb.git

commit 1dea45828390e3116ba317bd5e39c7d4b055ae62
Author: Ian Maxon <ia...@maxons.email>
AuthorDate: Mon Mar 22 16:50:49 2021 -0700

    [ASTERIXDB-2855] Allow additions to Python UDF env
    
    - user model changes: no
    - storage format changes: no
    - interface changes: no
    
    Details:
    
    - Add a new configuration parameter to allow
      appending new environment variables to the
      Python interpreter process used in Python
      UDFs.
    
    - Add test to check that it works.
    
    - Skip adding empty args, path or env during
      setup of Python UDF commandline
    
    Change-Id: Ib6e1ee7debc9c2e07d24163542b1f98886792161
    Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/10644
    Reviewed-by: Ian Maxon <im...@uci.edu>
    Reviewed-by: Dmitry Lychagin <dm...@couchbase.com>
    Reviewed-by: Till Westmann <ti...@apache.org>
    Integration-Tests: Jenkins <je...@fulliautomatix.ics.uci.edu>
    Tested-by: Jenkins <je...@fulliautomatix.ics.uci.edu>
---
 .../src/test/resources/TweetSent/roundtrip.py      |  4 +++
 asterixdb/asterix-app/src/test/resources/cc.conf   |  1 +
 .../py_function_error.2.ddl.sqlpp                  |  5 +++-
 ...2.ddl.sqlpp => py_function_error.5.query.sqlpp} | 12 ++++----
 ...2.ddl.sqlpp => py_function_error.6.query.sqlpp} | 12 ++++----
 .../py_function_error/py_function_error.3.json     |  1 +
 .../py_function_error/py_function_error.4.json     |  1 +
 .../resources/runtimets/testsuite_it_python.xml    |  2 +-
 .../external/library/PythonLibraryEvaluator.java   | 17 +++++++----
 .../library/PythonLibraryEvaluatorFactory.java     | 34 +++++++++++++++++++---
 .../control/common/controllers/NCConfig.java       |  4 +++
 11 files changed, 71 insertions(+), 22 deletions(-)

diff --git a/asterixdb/asterix-app/src/test/resources/TweetSent/roundtrip.py b/asterixdb/asterix-app/src/test/resources/TweetSent/roundtrip.py
index 8b8fced..9058a01 100644
--- a/asterixdb/asterix-app/src/test/resources/TweetSent/roundtrip.py
+++ b/asterixdb/asterix-app/src/test/resources/TweetSent/roundtrip.py
@@ -15,6 +15,7 @@
 # specific language governing permissions and limitations
 # under the License.
 import math
+import os
 
 def sqrt(num):
     return math.sqrt(num)
@@ -26,3 +27,6 @@ class Tests(object):
 
     def warning(self):
         raise ArithmeticError("oof")
+
+    def env_test(self, key):
+        return os.environ[key]
diff --git a/asterixdb/asterix-app/src/test/resources/cc.conf b/asterixdb/asterix-app/src/test/resources/cc.conf
index e2cd5b9..9532849 100644
--- a/asterixdb/asterix-app/src/test/resources/cc.conf
+++ b/asterixdb/asterix-app/src/test/resources/cc.conf
@@ -34,6 +34,7 @@ nc.api.port=19005
 [nc]
 credential.file=src/test/resources/security/passwd
 python.cmd.autolocate=true
+python.env=FOO=BAR=BAZ,BAR=BAZ
 address=127.0.0.1
 command=asterixnc
 app.class=org.apache.asterix.hyracks.bootstrap.NCApplication
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-library/py_function_error/py_function_error.2.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-library/py_function_error/py_function_error.2.ddl.sqlpp
index 0ad9fb3..74fe03f 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-library/py_function_error/py_function_error.2.ddl.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-library/py_function_error/py_function_error.2.ddl.sqlpp
@@ -23,4 +23,7 @@ create function warning()
   as "roundtrip", "Tests.warning" at testlib;
 
 create function roundtrip(s)
-  as "roundtrip", "Tests.roundtrip" at testlib;
\ No newline at end of file
+  as "roundtrip", "Tests.roundtrip" at testlib;
+
+create function env_test(k)
+  as "roundtrip", "Tests.env_test" at testlib;
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-library/py_function_error/py_function_error.2.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-library/py_function_error/py_function_error.5.query.sqlpp
similarity index 82%
copy from asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-library/py_function_error/py_function_error.2.ddl.sqlpp
copy to asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-library/py_function_error/py_function_error.5.query.sqlpp
index 0ad9fb3..7334470 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-library/py_function_error/py_function_error.2.ddl.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-library/py_function_error/py_function_error.5.query.sqlpp
@@ -16,11 +16,13 @@
  * specific language governing permissions and limitations
  * under the License.
  */
+/*
+* Description  : Access a records nested records at each level.
+* Expected Res : Success
+* Date         : 04 Jun 2015
+*/
+// param max-warnings:json=0
 
 use test;
 
-create function warning()
-  as "roundtrip", "Tests.warning" at testlib;
-
-create function roundtrip(s)
-  as "roundtrip", "Tests.roundtrip" at testlib;
\ No newline at end of file
+env_test("FOO");
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-library/py_function_error/py_function_error.2.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-library/py_function_error/py_function_error.6.query.sqlpp
similarity index 82%
copy from asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-library/py_function_error/py_function_error.2.ddl.sqlpp
copy to asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-library/py_function_error/py_function_error.6.query.sqlpp
index 0ad9fb3..092af06 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-library/py_function_error/py_function_error.2.ddl.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-library/py_function_error/py_function_error.6.query.sqlpp
@@ -16,11 +16,13 @@
  * specific language governing permissions and limitations
  * under the License.
  */
+/*
+* Description  : Access a records nested records at each level.
+* Expected Res : Success
+* Date         : 04 Jun 2015
+*/
+// param max-warnings:json=0
 
 use test;
 
-create function warning()
-  as "roundtrip", "Tests.warning" at testlib;
-
-create function roundtrip(s)
-  as "roundtrip", "Tests.roundtrip" at testlib;
\ No newline at end of file
+env_test("BAR");
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-library/py_function_error/py_function_error.3.json b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-library/py_function_error/py_function_error.3.json
new file mode 100644
index 0000000..58de662
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-library/py_function_error/py_function_error.3.json
@@ -0,0 +1 @@
+"BAR=BAZ"
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-library/py_function_error/py_function_error.4.json b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-library/py_function_error/py_function_error.4.json
new file mode 100644
index 0000000..f73df98
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-library/py_function_error/py_function_error.4.json
@@ -0,0 +1 @@
+"BAZ"
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_it_python.xml b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_it_python.xml
index 4e1d5b2..35bec85 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_it_python.xml
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_it_python.xml
@@ -57,7 +57,7 @@
     result[0].append(self.next_tuple(*arg, key=self.mid))
   File "entrypoint.py", line 99, in next_tuple
     return self.wrapped_fns[key](*args)
-  File "site-packages/roundtrip.py", line 28, in warning
+  File "site-packages/roundtrip.py", line 29, in warning
     raise ArithmeticError("oof")
 ArithmeticError: oof
  (in line 28, at column 1)</expected-warn>
diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/library/PythonLibraryEvaluator.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/library/PythonLibraryEvaluator.java
index e2229ee..457b86a 100644
--- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/library/PythonLibraryEvaluator.java
+++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/library/PythonLibraryEvaluator.java
@@ -27,6 +27,7 @@ import java.net.InetAddress;
 import java.nio.ByteBuffer;
 import java.util.ArrayList;
 import java.util.List;
+import java.util.Map;
 import java.util.concurrent.TimeUnit;
 
 import org.apache.asterix.common.exceptions.AsterixException;
@@ -65,18 +66,21 @@ public class PythonLibraryEvaluator extends AbstractStateObject implements IDeal
     private IPCSystem ipcSys;
     private String sitePkgs;
     private List<String> pythonArgs;
+    private Map<String, String> pythonEnv;
     private TaskAttemptId task;
     private IWarningCollector warningCollector;
     private SourceLocation sourceLoc;
 
     public PythonLibraryEvaluator(JobId jobId, PythonLibraryEvaluatorId evaluatorId, ILibraryManager libMgr,
-            File pythonHome, String sitePkgs, List<String> pythonArgs, ExternalFunctionResultRouter router,
-            IPCSystem ipcSys, TaskAttemptId task, IWarningCollector warningCollector, SourceLocation sourceLoc) {
+            File pythonHome, String sitePkgs, List<String> pythonArgs, Map<String, String> pythonEnv,
+            ExternalFunctionResultRouter router, IPCSystem ipcSys, TaskAttemptId task,
+            IWarningCollector warningCollector, SourceLocation sourceLoc) {
         super(jobId, evaluatorId);
         this.libMgr = libMgr;
         this.pythonHome = pythonHome;
         this.sitePkgs = sitePkgs;
         this.pythonArgs = pythonArgs;
+        this.pythonEnv = pythonEnv;
         this.router = router;
         this.task = task;
         this.ipcSys = ipcSys;
@@ -98,8 +102,8 @@ public class PythonLibraryEvaluator extends AbstractStateObject implements IDeal
         args.add(InetAddress.getLoopbackAddress().getHostAddress());
         args.add(Integer.toString(port));
         args.add(sitePkgs);
-
         ProcessBuilder pb = new ProcessBuilder(args.toArray(new String[0]));
+        pb.environment().putAll(pythonEnv);
         pb.directory(new File(wd));
         p = pb.start();
         proto = new PythonIPCProto(p.getOutputStream(), router, p);
@@ -199,14 +203,15 @@ public class PythonLibraryEvaluator extends AbstractStateObject implements IDeal
 
     public static PythonLibraryEvaluator getInstance(IExternalFunctionInfo finfo, ILibraryManager libMgr,
             ExternalFunctionResultRouter router, IPCSystem ipcSys, File pythonHome, IHyracksTaskContext ctx,
-            String sitePkgs, List<String> pythonArgs, IWarningCollector warningCollector, SourceLocation sourceLoc)
-            throws IOException, AsterixException {
+            String sitePkgs, List<String> pythonArgs, Map<String, String> pythonEnv, IWarningCollector warningCollector,
+            SourceLocation sourceLoc) throws IOException, AsterixException {
         PythonLibraryEvaluatorId evaluatorId = new PythonLibraryEvaluatorId(finfo.getLibraryDataverseName(),
                 finfo.getLibraryName(), Thread.currentThread());
         PythonLibraryEvaluator evaluator = (PythonLibraryEvaluator) ctx.getStateObject(evaluatorId);
         if (evaluator == null) {
             evaluator = new PythonLibraryEvaluator(ctx.getJobletContext().getJobId(), evaluatorId, libMgr, pythonHome,
-                    sitePkgs, pythonArgs, router, ipcSys, ctx.getTaskAttemptId(), warningCollector, sourceLoc);
+                    sitePkgs, pythonArgs, pythonEnv, router, ipcSys, ctx.getTaskAttemptId(), warningCollector,
+                    sourceLoc);
             ctx.getJobletContext().registerDeallocatable(evaluator);
             evaluator.initialize();
             ctx.setStateObject(evaluator);
diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/library/PythonLibraryEvaluatorFactory.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/library/PythonLibraryEvaluatorFactory.java
index 86d51de..06c9bc9 100644
--- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/library/PythonLibraryEvaluatorFactory.java
+++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/library/PythonLibraryEvaluatorFactory.java
@@ -23,8 +23,9 @@ import static org.apache.asterix.external.library.PythonLibraryEvaluator.SITE_PA
 import java.io.File;
 import java.io.IOException;
 import java.util.ArrayList;
-import java.util.Arrays;
+import java.util.HashMap;
 import java.util.List;
+import java.util.Map;
 
 import org.apache.asterix.common.api.INcApplicationContext;
 import org.apache.asterix.common.exceptions.AsterixException;
@@ -46,6 +47,7 @@ public class PythonLibraryEvaluatorFactory {
     private final ExternalFunctionResultRouter router;
     private final String sitePackagesPath;
     private final List<String> pythonArgs;
+    private final Map<String, String> pythonEnv;
 
     public PythonLibraryEvaluatorFactory(IHyracksTaskContext ctx) throws AsterixException {
         this.ctx = ctx;
@@ -67,17 +69,41 @@ public class PythonLibraryEvaluatorFactory {
                         + NCConfig.Option.PYTHON_CMD_AUTOLOCATE.ini() + " is false");
             }
         }
+        pythonEnv = new HashMap<>();
+        String[] envRaw = appCfg.getStringArray((NCConfig.Option.PYTHON_ENV));
+        if (envRaw != null) {
+            for (String rawEnvArg : envRaw) {
+                //TODO: i think equals is shared among all unixes and windows. but it needs verification
+                if (rawEnvArg.length() < 1) {
+                    continue;
+                }
+                String[] rawArgSplit = rawEnvArg.split("(?<!\\\\)=", 2);
+                if (rawArgSplit.length < 2) {
+                    throw AsterixException.create(ErrorCode.EXTERNAL_UDF_EXCEPTION,
+                            "Invalid environment variable format detected.");
+                }
+                pythonEnv.put(rawArgSplit[0], rawArgSplit[1]);
+            }
+        }
         pythonPath = new File(pythonPathCmd);
         List<String> sitePkgs = new ArrayList<>();
         sitePkgs.add(SITE_PACKAGES);
         String[] addlSitePackages = appCfg.getStringArray((NCConfig.Option.PYTHON_ADDITIONAL_PACKAGES));
-        sitePkgs.addAll(Arrays.asList(addlSitePackages));
+        for (String sitePkg : addlSitePackages) {
+            if (sitePkg.length() > 0) {
+                sitePkgs.add(sitePkg);
+            }
+        }
         if (appCfg.getBoolean(NCConfig.Option.PYTHON_USE_BUNDLED_MSGPACK)) {
             sitePkgs.add("ipc" + File.separator + SITE_PACKAGES + File.separator);
         }
         String[] pythonArgsRaw = appCfg.getStringArray(NCConfig.Option.PYTHON_ARGS);
         if (pythonArgsRaw != null) {
-            pythonArgs.addAll(Arrays.asList(pythonArgsRaw));
+            for (String arg : pythonArgsRaw) {
+                if (arg.length() > 0) {
+                    pythonArgs.add(arg);
+                }
+            }
         }
         StringBuilder sitePackagesPathBuilder = new StringBuilder();
         for (int i = 0; i < sitePkgs.size() - 1; i++) {
@@ -91,6 +117,6 @@ public class PythonLibraryEvaluatorFactory {
     public PythonLibraryEvaluator getEvaluator(IExternalFunctionInfo fnInfo, SourceLocation sourceLoc)
             throws IOException, AsterixException {
         return PythonLibraryEvaluator.getInstance(fnInfo, libraryManager, router, ipcSys, pythonPath, ctx,
-                sitePackagesPath, pythonArgs, ctx.getWarningCollector(), sourceLoc);
+                sitePackagesPath, pythonArgs, pythonEnv, ctx.getWarningCollector(), sourceLoc);
     }
 }
diff --git a/hyracks-fullstack/hyracks/hyracks-control/hyracks-control-common/src/main/java/org/apache/hyracks/control/common/controllers/NCConfig.java b/hyracks-fullstack/hyracks/hyracks-control/hyracks-control-common/src/main/java/org/apache/hyracks/control/common/controllers/NCConfig.java
index 22b240a..01cb9bf 100644
--- a/hyracks-fullstack/hyracks/hyracks-control/hyracks-control-common/src/main/java/org/apache/hyracks/control/common/controllers/NCConfig.java
+++ b/hyracks-fullstack/hyracks/hyracks-control/hyracks-control-common/src/main/java/org/apache/hyracks/control/common/controllers/NCConfig.java
@@ -98,6 +98,7 @@ public class NCConfig extends ControllerConfig {
         PYTHON_ADDITIONAL_PACKAGES(STRING_ARRAY, new String[0]),
         PYTHON_USE_BUNDLED_MSGPACK(BOOLEAN, true),
         PYTHON_ARGS(STRING_ARRAY, (String[]) null),
+        PYTHON_ENV(STRING_ARRAY, (String[]) null),
         CREDENTIAL_FILE(
                 OptionTypes.STRING,
                 (Function<IApplicationConfig, String>) appConfig -> FileUtil
@@ -245,6 +246,8 @@ public class NCConfig extends ControllerConfig {
                     return "Python args to pass to Python interpreter";
                 case PYTHON_CMD_AUTOLOCATE:
                     return "Whether or not to attempt to automatically set PYTHON_CMD to a usable interpreter";
+                case PYTHON_ENV:
+                    return "List of environment variables to set when invoking the Python interpreter for Python UDFs. E.g. FOO=1";
                 case CREDENTIAL_FILE:
                     return "Path to HTTP basic credentials";
                 default:
@@ -621,4 +624,5 @@ public class NCConfig extends ControllerConfig {
     public String getCredentialFilePath() {
         return getAppConfig().getString(Option.CREDENTIAL_FILE);
     }
+
 }