You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@liminal.apache.org by jb...@apache.org on 2020/07/20 06:25:18 UTC

[incubator-liminal] 38/43: Local mode improvements

This is an automated email from the ASF dual-hosted git repository.

jbonofre pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-liminal.git

commit 07aad66928fb260d2dbb2ce821c2a5b1f864b55c
Author: assapin <47...@users.noreply.github.com>
AuthorDate: Mon Jun 22 15:03:53 2020 +0300

    Local mode improvements
---
 MANIFEST.in                                 |  21 +++++++++
 README.md                                   |  67 +++++++++++++++++++++++++---
 images/airflow_trigger.png                  | Bin 0 -> 148427 bytes
 images/k8s_running.png                      | Bin 0 -> 223001 bytes
 rainbow/runners/airflow/dag/rainbow_dags.py |   2 +-
 scripts/docker-compose.yml                  |   1 +
 scripts/package.sh                          |  19 +++-----
 setup.py                                    |  17 ++++---
 tests/runners/airflow/rainbow/rainbow.yml   |   2 +-
 9 files changed, 101 insertions(+), 28 deletions(-)

diff --git a/MANIFEST.in b/MANIFEST.in
new file mode 100644
index 0000000..04cdb6d
--- /dev/null
+++ b/MANIFEST.in
@@ -0,0 +1,21 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required bgit y applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+include scripts/*
+include requirements-airflow.txt
+recursive-include rainbow/build/ *
\ No newline at end of file
diff --git a/README.md b/README.md
index ee2f961..078343a 100644
--- a/README.md
+++ b/README.md
@@ -1,3 +1,22 @@
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required bgit y applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
 # Rainbow
 
 Rainbow is an end-to-end platform for data engineers & scientists, allowing them to build,
@@ -80,7 +99,7 @@ services:
 # Installation
 1. Install this package
 ```bash
-   pip install git+https://github.com/Natural-Intelligence/rainbow.git@rainbow_local_mode
+   pip install liminal
 ```
 2. Optional: set RAINBOW_HOME to path of your choice (if not set, will default to ~/rainbow_home)
 ```bash
@@ -102,18 +121,54 @@ a requirements.txt in the root of your project.
 
 When your pipeline code is ready, you can test it by running it locally on your machine.
 
-1. Deploy the pipeline:
+1. Ensure you have The Docker engine running locally, and enable a local Kubernetes cluster:
+![Kubernetes configured](https://raw.githubusercontent.com/Natural-Intelligence/rainbow/rainbow_local_mode/images/k8s_running.png)
+
+If you want to execute your pipeline on a remote kubernetes cluster, make sure the cluster is configured
+using :
+```bash
+kubectl config set-context <your remote kubernetes cluster>
+``` 
+2. Build the docker images used by your pipeline.
+
+In the example pipeline above, you can see that tasks and services have an "image" field - such as 
+"my_static_input_task_image". This means that the task is executed inside a docker container, and the docker container 
+is created from a docker image where various code and libraries are installed.
+
+You can take a look at what the build process looks like, e.g. 
+[here](https://github.com/Natural-Intelligence/rainbow/tree/master/rainbow/build/image/python)
+
+In order for the images to be available for your pipeline, you'll need to build them locally:
+
+```bash
+cd </path/to/your/rainbow/code>
+rainbow build
+```
+
+You'll see that a number of outputs indicating various docker images built.
+
+3. Deploy the pipeline:
 ```bash
 cd </path/to/your/rainbow/code> 
 rainbow deploy
 ```
-2. Make sure you have docker running
-3. Start the Server
+
+4. Start the server
 ```bash
 rainbow start
 ```
-4. Navigate to [http://localhost:8080/admin]
-5. You should see your ![pipeline](https://raw.githubusercontent.com/Natural-Intelligence/rainbow/rainbow_local_mode/images/airflow.png")
+
+5. Navigate to [http://localhost:8080/admin](http://localhost:8080/admin)
+
+6. You should see your ![pipeline](https://raw.githubusercontent.com/Natural-Intelligence/rainbow/master/images/airflow.png)
+The pipeline is scheduled to run according to the ```json schedule: 0 * 1 * *``` field in the .yml file you provided.
+
+7. To manually activate your pipeline:
+Click your pipeline and then click "trigger DAG"
+Click "Graph view"
+You should see the steps in your pipeline getting executed in "real time" by clicking "Refresh" periodically.
+
+![Pipeline activation](https://raw.githubusercontent.com/Natural-Intelligence/rainbow/rainbow_local_mode/images/airflow_trigger.png)
 
 ### Running Tests (for contributors)
 When doing local development and running Rainbow unit-tests, make sure to set RAINBOW_STAND_ALONE_MODE=True
diff --git a/images/airflow_trigger.png b/images/airflow_trigger.png
new file mode 100644
index 0000000..22168e8
Binary files /dev/null and b/images/airflow_trigger.png differ
diff --git a/images/k8s_running.png b/images/k8s_running.png
new file mode 100644
index 0000000..8bf8f3b
Binary files /dev/null and b/images/k8s_running.png differ
diff --git a/rainbow/runners/airflow/dag/rainbow_dags.py b/rainbow/runners/airflow/dag/rainbow_dags.py
index 730fd03..9deef8e 100644
--- a/rainbow/runners/airflow/dag/rainbow_dags.py
+++ b/rainbow/runners/airflow/dag/rainbow_dags.py
@@ -35,7 +35,7 @@ __DEPENDS_ON_PAST = 'depends_on_past'
 
 def register_dags(configs_path):
     """
-    TODO: doc for register_dags
+    Registers pipelines in rainbow yml files found in given path (recursively) as airflow DAGs.
     """
     print(f'Registering DAG from path: {configs_path}')
     config_files = files_util.find_config_files(configs_path)
diff --git a/scripts/docker-compose.yml b/scripts/docker-compose.yml
index b6a2dc3..d0304e5 100644
--- a/scripts/docker-compose.yml
+++ b/scripts/docker-compose.yml
@@ -30,6 +30,7 @@
                     max-file: "3"
             volumes:
                 - ${RAINBOW_HOME}:/usr/local/airflow/dags
+                - ${HOME}/.kube:/usr/local/airflow/.kube
             ports:
                 - "8080:8080"
             command: webserver
diff --git a/scripts/package.sh b/scripts/package.sh
index f4083e4..7824fd5 100755
--- a/scripts/package.sh
+++ b/scripts/package.sh
@@ -42,20 +42,14 @@ rsync -a --exclude 'venv' $(PWD)/ $docker_build_dir/zip_content/
 # perform installation of external pacakges (framework-requirements and user-requirements)
 # this is done inside a docker to 1) avoid requiring the user to install stuff, and 2) to create a platform-compatible
 # package (install the native libraries in a flavour suitable for the docker in which airflow runs, and not user machine)
-docker stop rainbow_build
-docker rm rainbow_build
-docker run --name rainbow_build -v /private/"$docker_build_dir":/home/rainbow/tmp --entrypoint="" -u 0 \
-       puckel/docker-airflow:1.10.9 /bin/bash -c "apt-get update && apt-get install -y wget && apt-get install -y git &&
-       cd /home/rainbow/tmp/zip_content &&
-       wget https://raw.githubusercontent.com/Natural-Intelligence/rainbow/rainbow_local_mode/rainbow/runners/airflow/dag/rainbow_dags.py &&
-       wget https://raw.githubusercontent.com/Natural-Intelligence/rainbow/rainbow_local_mode/requirements-airflow.txt &&
-       wget https://raw.githubusercontent.com/Natural-Intelligence/rainbow/rainbow_local_mode/scripts/docker-compose.yml &&
-       pip install --no-deps --target=\"/home/rainbow/tmp/zip_content\" git+https://github.com/Natural-Intelligence/rainbow.git@rainbow_local_mode &&
+
+docker run --rm --name rainbow_build -v /private/"$docker_build_dir":/home/rainbow/tmp --entrypoint="" -u 0 \
+       puckel/docker-airflow:1.10.9 /bin/bash -c "cd /home/rainbow/tmp/zip_content &&
+       pip install --no-deps --target=\"/home/rainbow/tmp/zip_content\" liminal==0.0.2dev5 &&
+       rsync -avzh --ignore-errors /home/rainbow/tmp/zip_content/liminal-resources/* /home/rainbow/tmp/zip_content/
        pip install --target=\"/home/rainbow/tmp/zip_content\" -r /home/rainbow/tmp/zip_content/requirements-airflow.txt &&
        pip install --target=\"/home/rainbow/tmp/zip_content\" -r /home/rainbow/tmp/zip_content/requirements.txt"
 
-docker stop rainbow_build
-docker rm rainbow_build
 
 # zip the content per https://airflow.apache.org/docs/stable/concepts.html#packaged-dags
 cd $docker_build_dir/zip_content
@@ -64,6 +58,3 @@ rm __init__.py
 
 zip -r ../dags/rainbows.zip .
 cp ../dags/rainbows.zip $target_path
-
-
-
diff --git a/setup.py b/setup.py
index c102ae3..2a0fdbb 100644
--- a/setup.py
+++ b/setup.py
@@ -17,9 +17,9 @@
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
+import os
 
 import setuptools
-from setuptools import setup
 
 with open("README.md", "r") as fh:
     long_description = fh.read()
@@ -29,9 +29,9 @@ with open('requirements.txt') as f:
     print(requirements)
 
 setuptools.setup(
-    name="rainbow",
-    version="0.0.1",
-    author="Rainbow team",
+    name="liminal",
+    version=os.environ["LIMINAL_BUILD_VERSION"],
+    author="liminal team",
     description="A package for authoring and deploying machine learning workflows",
     long_description=long_description,
     long_description_content_type="text/markdown",
@@ -39,10 +39,15 @@ setuptools.setup(
     packages=setuptools.find_packages(),
     classifiers=[
         "Programming Language :: Python :: 3",
-        "License :: Apache 2.0",
+        "License :: OSI Approved :: Apache Software License",
         "Operating System :: OS Independent",
     ],
+    license='Apache License, Version 2.0',
     python_requires='>=3.6',
     install_requires=requirements,
-    scripts=['scripts/rainbow', 'scripts/package.sh']
+    scripts=['scripts/rainbow', 'scripts/package.sh'],
+    include_package_data=True,
+    data_files=[('liminal-resources', ['scripts/docker-compose.yml',
+                                       'requirements-airflow.txt',
+                                       'rainbow/runners/airflow/dag/rainbow_dags.py'])]
 )
diff --git a/tests/runners/airflow/rainbow/rainbow.yml b/tests/runners/airflow/rainbow/rainbow.yml
index 77af37b..1d5da13 100644
--- a/tests/runners/airflow/rainbow/rainbow.yml
+++ b/tests/runners/airflow/rainbow/rainbow.yml
@@ -30,7 +30,7 @@ pipelines:
       key2: val2
     metrics:
       namespace: TestNamespace
-      backends: [ 'cloudwatch' ]
+      backends: [ ]
     tasks:
       - task: my_static_input_task
         type: python