You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@wayang.apache.org by be...@apache.org on 2022/04/08 17:16:49 UTC
[incubator-wayang] 28/32: [WAYANG-#8] Add Benchmark test of Python-Platform to SO
This is an automated email from the ASF dual-hosted git repository.
bertty pushed a commit to branch python-platform
in repository https://gitbox.apache.org/repos/asf/incubator-wayang.git
commit b63997f7fa768ae560d17dd8650f62f82d29ee7a
Author: Bertty Contreras-Rojas <be...@databloom.ai>
AuthorDate: Fri Apr 8 01:14:54 2022 +0200
[WAYANG-#8] Add Benchmark test of Python-Platform to SO
Signed-off-by: bertty <be...@apache.org>
---
python/bin/benchmark.sh | 31 ++++++++++
python/bin/test.sh | 4 +-
python/src/pywy/config.py | 15 +++--
.../pywy/tests/benchmark/python_benchmark_test.py | 68 +++++++++++++++++++---
.../pywy/tests/integration/python_platform_test.py | 4 +-
5 files changed, 106 insertions(+), 16 deletions(-)
diff --git a/python/bin/benchmark.sh b/python/bin/benchmark.sh
new file mode 100755
index 00000000..0e5ba138
--- /dev/null
+++ b/python/bin/benchmark.sh
@@ -0,0 +1,31 @@
+#!/bin/bash
+
+BASE=$(cd "$(dirname "$0")/.." | pwd)
+echo "$BASE"
+
+function create_next_file(){
+ path=$1
+ prev=$2
+ curr=$((prev + 1))
+ real_orig_path=$(echo "${path}" | sed "s/##/${prev}/g")
+ real_dest_path=$(echo "${path}" | sed "s/##/${curr}/g")
+ if [ -f "${real_dest_path}" ] ; then
+ echo "skiping the generation of ${real_dest_path}, because exist"
+ return
+ fi
+ if [ ! -f "${real_orig_path}" ] ; then
+ echo "it is not possible to generate the file ${real_dest_path}, because does not exist ${real_orig_path}"
+ return
+ fi
+ touch ${real_dest_path}
+ for i in {1..10} ; do
+ cat "${real_orig_path}" >> ${real_dest_path}
+ done
+}
+
+# this will generate from 1MB until 100GB of data of text
+for i in {0..4} ; do
+ create_next_file "${BASE}/src/pywy/tests/resources/10e##MB.input" ${i}
+done
+ls -lah ${BASE}/src/pywy/tests/resources/ | grep "10e"
+
diff --git a/python/bin/test.sh b/python/bin/test.sh
index 620e4622..57699767 100755
--- a/python/bin/test.sh
+++ b/python/bin/test.sh
@@ -1,6 +1,6 @@
#!/bin/bash
-
-cd "$(dirname "$0")/.."
+BASE=$(cd "$(dirname "$0")/.." | pwd)
+cd ${BASE}
python -m unittest discover -s ./src/ --pattern=*test.py
\ No newline at end of file
diff --git a/python/src/pywy/config.py b/python/src/pywy/config.py
index d7198100..6db0f3bb 100644
--- a/python/src/pywy/config.py
+++ b/python/src/pywy/config.py
@@ -6,14 +6,20 @@ logging.basicConfig(level=os.environ.get("LOGLEVEL", "INFO"))
logger = logging.getLogger(__name__)
-def if_environ(key: str, default_value: str) -> str:
+def if_environ_file(key: str, default_value: str) -> str:
return os.environ[key] if key in os.environ else os.path.abspath(default_value)
+def if_environ_int(key: str, default_value: int) -> int:
+ return int(os.environ[key]) if key in os.environ else default_value
+
+
BASE_DIR = pkg_resources.resource_filename("pywy", "")
-RC_DIR = if_environ("PYWY_RC_HOME", os.path.expanduser("~/.pywy"))
-RC_TEST_DIR = if_environ("PYWY_RC_TEST_HOME", "{}/tests/resources".format(BASE_DIR))
-#RC_TEST_OUT_DIR = if_environ("PYWY_RC_TEST_OUT_HOME", "{}/../../output".format(BASE_DIR))
+RC_DIR = if_environ_file("PYWY_RC_HOME", os.path.expanduser("~/.pywy"))
+RC_TEST_DIR = if_environ_file("PYWY_RC_TEST_HOME", "{}/tests/resources".format(BASE_DIR))
+# RC_TEST_OUT_DIR = if_environ("PYWY_RC_TEST_OUT_HOME", "{}/../../output".format(BASE_DIR))
+RC_BENCHMARK_SIZE = if_environ_int("PYWY_RC_BENCHMARK_SIZE", 2)
+
logger.info(" Environment variables")
logger.info(" ############################")
@@ -21,5 +27,6 @@ logger.info(f" ## {BASE_DIR=}")
logger.info(f" ## {RC_DIR=}")
logger.info(f" ## {RC_TEST_DIR=}")
#logger.info(f" ## {RC_TEST_OUT_DIR=}")
+logger.info(f" ## {RC_BENCHMARK_SIZE=}")
logger.info(" ############################")
# print(HOME_DIR)
diff --git a/python/src/pywy/tests/benchmark/python_benchmark_test.py b/python/src/pywy/tests/benchmark/python_benchmark_test.py
index 6538beaf..6a96f5fb 100644
--- a/python/src/pywy/tests/benchmark/python_benchmark_test.py
+++ b/python/src/pywy/tests/benchmark/python_benchmark_test.py
@@ -1,15 +1,65 @@
+import os
+import tempfile
import unittest
+import time
+import logging
+from typing import List
+from pywy.dataquanta import WayangContext
+from pywy.plugins import PYTHON
+from pywy.config import RC_TEST_DIR as ROOT
+from pywy.config import RC_BENCHMARK_SIZE
+
+logger = logging.getLogger(__name__)
+
+
+class TestBenchmarkPythonGrep(unittest.TestCase):
+ file_grep: List[str]
-class TestBenchmarkPython(unittest.TestCase):
def setUp(self):
- pass
+ full_list = [
+ "{}/10e0MB.input".format(ROOT),
+ "{}/10e1MB.input".format(ROOT),
+ "{}/10e2MB.input".format(ROOT),
+ "{}/10e3MB.input".format(ROOT),
+ "{}/10e4MB.input".format(ROOT),
+ "{}/10e5MB.input".format(ROOT),
+ ]
+ self.file_grep = full_list[:RC_BENCHMARK_SIZE]
+
+
+ @staticmethod
+ def grep_python(path):
+ def pre(a: str) -> bool:
+ return 'six' in a
+
+ fd, path_tmp = tempfile.mkstemp()
+
+ tic = time.perf_counter()
+ WayangContext() \
+ .register(PYTHON) \
+ .textfile(path) \
+ .filter(pre) \
+ .store_textfile(path_tmp)
+ toc = time.perf_counter()
+
+ os.remove(path_tmp)
+ return tic, toc
+
+ @staticmethod
+ def grep_so(path):
+ fd, path_tmp = tempfile.mkstemp()
+
+ tic = time.perf_counter()
+ os.system('grep "six" {} >> {}'.format(path, path_tmp))
+ toc = time.perf_counter()
- def test_TO_REMOVE(self):
- """
- TODO REMOVE THIS TEST, IT JUST TO VALIDATE THAT EVERYTHING IS CORRECT IN TERMS OF ENVIRONMENT
- Returns
- -------
+ os.remove(path_tmp)
+ return tic, toc
- """
- self.assertEqual("a", "a")
+ def test_grep(self):
+ for path in self.file_grep:
+ (tic, toc) = self.grep_python(path)
+ logger.info(f"Python-Platform time for the file {path} {toc - tic:0.4f} seconds")
+ (tic, toc) = self.grep_so(path)
+ logger.info(f"Operative System time for the file {path} {toc - tic:0.4f} seconds")
diff --git a/python/src/pywy/tests/integration/python_platform_test.py b/python/src/pywy/tests/integration/python_platform_test.py
index 098c1f9c..64a7510d 100644
--- a/python/src/pywy/tests/integration/python_platform_test.py
+++ b/python/src/pywy/tests/integration/python_platform_test.py
@@ -1,3 +1,4 @@
+import os
import unittest
import tempfile
from os import fdopen
@@ -34,9 +35,10 @@ class TestIntegrationPythonPlatform(unittest.TestCase):
selectivity = len(list(lines_filter))
lines_platform: List[str]
- with fdopen(path_tmp, 'r') as fp:
+ with open(path_tmp, 'r') as fp:
lines_platform = fp.readlines()
elements = len(lines_platform)
+ os.remove(path_tmp)
self.assertEqual(selectivity, elements)
self.assertEqual(lines_filter, lines_platform)