You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@wayang.apache.org by be...@apache.org on 2022/04/08 17:16:49 UTC

[incubator-wayang] 28/32: [WAYANG-#8] Add Benchmark test of Python-Platform to SO

This is an automated email from the ASF dual-hosted git repository.

bertty pushed a commit to branch python-platform
in repository https://gitbox.apache.org/repos/asf/incubator-wayang.git

commit b63997f7fa768ae560d17dd8650f62f82d29ee7a
Author: Bertty Contreras-Rojas <be...@databloom.ai>
AuthorDate: Fri Apr 8 01:14:54 2022 +0200

    [WAYANG-#8] Add Benchmark test of Python-Platform to SO
    
    Signed-off-by: bertty <be...@apache.org>
---
 python/bin/benchmark.sh                            | 31 ++++++++++
 python/bin/test.sh                                 |  4 +-
 python/src/pywy/config.py                          | 15 +++--
 .../pywy/tests/benchmark/python_benchmark_test.py  | 68 +++++++++++++++++++---
 .../pywy/tests/integration/python_platform_test.py |  4 +-
 5 files changed, 106 insertions(+), 16 deletions(-)

diff --git a/python/bin/benchmark.sh b/python/bin/benchmark.sh
new file mode 100755
index 00000000..0e5ba138
--- /dev/null
+++ b/python/bin/benchmark.sh
@@ -0,0 +1,31 @@
+#!/bin/bash
+
+BASE=$(cd "$(dirname "$0")/.." | pwd)
+echo "$BASE"
+
+function create_next_file(){
+  path=$1
+  prev=$2
+  curr=$((prev + 1))
+  real_orig_path=$(echo "${path}" | sed "s/##/${prev}/g")
+  real_dest_path=$(echo "${path}" | sed "s/##/${curr}/g")
+  if [ -f "${real_dest_path}" ] ; then
+    echo "skiping the generation of ${real_dest_path}, because exist"
+    return
+  fi
+  if [ ! -f "${real_orig_path}" ] ; then
+    echo "it is not possible to generate the file ${real_dest_path}, because does not exist ${real_orig_path}"
+    return
+  fi
+  touch ${real_dest_path}
+  for i in {1..10} ; do
+    cat "${real_orig_path}" >> ${real_dest_path}
+  done
+}
+
+# this will generate from 1MB until 100GB of data of text
+for i in {0..4} ; do
+  create_next_file "${BASE}/src/pywy/tests/resources/10e##MB.input" ${i}
+done
+ls -lah ${BASE}/src/pywy/tests/resources/ | grep "10e"
+
diff --git a/python/bin/test.sh b/python/bin/test.sh
index 620e4622..57699767 100755
--- a/python/bin/test.sh
+++ b/python/bin/test.sh
@@ -1,6 +1,6 @@
 #!/bin/bash
 
-
-cd "$(dirname "$0")/.."
+BASE=$(cd "$(dirname "$0")/.." | pwd)
+cd ${BASE}
 
 python -m unittest discover -s ./src/ --pattern=*test.py
\ No newline at end of file
diff --git a/python/src/pywy/config.py b/python/src/pywy/config.py
index d7198100..6db0f3bb 100644
--- a/python/src/pywy/config.py
+++ b/python/src/pywy/config.py
@@ -6,14 +6,20 @@ logging.basicConfig(level=os.environ.get("LOGLEVEL", "INFO"))
 logger = logging.getLogger(__name__)
 
 
-def if_environ(key: str, default_value: str) -> str:
+def if_environ_file(key: str, default_value: str) -> str:
     return os.environ[key] if key in os.environ else os.path.abspath(default_value)
 
 
+def if_environ_int(key: str, default_value: int) -> int:
+    return int(os.environ[key]) if key in os.environ else default_value
+
+
 BASE_DIR = pkg_resources.resource_filename("pywy", "")
-RC_DIR = if_environ("PYWY_RC_HOME", os.path.expanduser("~/.pywy"))
-RC_TEST_DIR = if_environ("PYWY_RC_TEST_HOME", "{}/tests/resources".format(BASE_DIR))
-#RC_TEST_OUT_DIR = if_environ("PYWY_RC_TEST_OUT_HOME", "{}/../../output".format(BASE_DIR))
+RC_DIR = if_environ_file("PYWY_RC_HOME", os.path.expanduser("~/.pywy"))
+RC_TEST_DIR = if_environ_file("PYWY_RC_TEST_HOME", "{}/tests/resources".format(BASE_DIR))
+# RC_TEST_OUT_DIR = if_environ("PYWY_RC_TEST_OUT_HOME", "{}/../../output".format(BASE_DIR))
+RC_BENCHMARK_SIZE = if_environ_int("PYWY_RC_BENCHMARK_SIZE", 2)
+
 
 logger.info(" Environment variables")
 logger.info(" ############################")
@@ -21,5 +27,6 @@ logger.info(f" ## {BASE_DIR=}")
 logger.info(f" ## {RC_DIR=}")
 logger.info(f" ## {RC_TEST_DIR=}")
 #logger.info(f" ## {RC_TEST_OUT_DIR=}")
+logger.info(f" ## {RC_BENCHMARK_SIZE=}")
 logger.info(" ############################")
 # print(HOME_DIR)
diff --git a/python/src/pywy/tests/benchmark/python_benchmark_test.py b/python/src/pywy/tests/benchmark/python_benchmark_test.py
index 6538beaf..6a96f5fb 100644
--- a/python/src/pywy/tests/benchmark/python_benchmark_test.py
+++ b/python/src/pywy/tests/benchmark/python_benchmark_test.py
@@ -1,15 +1,65 @@
+import os
+import tempfile
 import unittest
+import time
+import logging
 
+from typing import List
+from pywy.dataquanta import WayangContext
+from pywy.plugins import PYTHON
+from pywy.config import RC_TEST_DIR as ROOT
+from pywy.config import RC_BENCHMARK_SIZE
+
+logger = logging.getLogger(__name__)
+
+
+class TestBenchmarkPythonGrep(unittest.TestCase):
+    file_grep: List[str]
 
-class TestBenchmarkPython(unittest.TestCase):
     def setUp(self):
-        pass
+        full_list = [
+            "{}/10e0MB.input".format(ROOT),
+            "{}/10e1MB.input".format(ROOT),
+            "{}/10e2MB.input".format(ROOT),
+            "{}/10e3MB.input".format(ROOT),
+            "{}/10e4MB.input".format(ROOT),
+            "{}/10e5MB.input".format(ROOT),
+        ]
+        self.file_grep = full_list[:RC_BENCHMARK_SIZE]
+
+
+    @staticmethod
+    def grep_python(path):
+        def pre(a: str) -> bool:
+            return 'six' in a
+
+        fd, path_tmp = tempfile.mkstemp()
+
+        tic = time.perf_counter()
+        WayangContext() \
+            .register(PYTHON) \
+            .textfile(path) \
+            .filter(pre) \
+            .store_textfile(path_tmp)
+        toc = time.perf_counter()
+
+        os.remove(path_tmp)
+        return tic, toc
+
+    @staticmethod
+    def grep_so(path):
+        fd, path_tmp = tempfile.mkstemp()
+
+        tic = time.perf_counter()
+        os.system('grep "six" {} >> {}'.format(path, path_tmp))
+        toc = time.perf_counter()
 
-    def test_TO_REMOVE(self):
-        """
-        TODO REMOVE THIS TEST, IT JUST TO VALIDATE THAT EVERYTHING IS CORRECT IN TERMS OF ENVIRONMENT
-        Returns
-        -------
+        os.remove(path_tmp)
+        return tic, toc
 
-        """
-        self.assertEqual("a", "a")
+    def test_grep(self):
+        for path in self.file_grep:
+            (tic, toc) = self.grep_python(path)
+            logger.info(f"Python-Platform time for the file {path} {toc - tic:0.4f} seconds")
+            (tic, toc) = self.grep_so(path)
+            logger.info(f"Operative System time for the file {path} {toc - tic:0.4f} seconds")
diff --git a/python/src/pywy/tests/integration/python_platform_test.py b/python/src/pywy/tests/integration/python_platform_test.py
index 098c1f9c..64a7510d 100644
--- a/python/src/pywy/tests/integration/python_platform_test.py
+++ b/python/src/pywy/tests/integration/python_platform_test.py
@@ -1,3 +1,4 @@
+import os
 import unittest
 import tempfile
 from os import fdopen
@@ -34,9 +35,10 @@ class TestIntegrationPythonPlatform(unittest.TestCase):
             selectivity = len(list(lines_filter))
 
         lines_platform: List[str]
-        with fdopen(path_tmp, 'r') as fp:
+        with open(path_tmp, 'r') as fp:
             lines_platform = fp.readlines()
             elements = len(lines_platform)
+        os.remove(path_tmp)
 
         self.assertEqual(selectivity, elements)
         self.assertEqual(lines_filter, lines_platform)