You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@iceberg.apache.org by dw...@apache.org on 2019/09/10 22:57:51 UTC
[incubator-iceberg] branch master updated: [python] Adding a string
to iceberg expression converter an tests (#426)
This is an automated email from the ASF dual-hosted git repository.
dweeks pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-iceberg.git
The following commit(s) were added to refs/heads/master by this push:
new 776210e [python] Adding a string to iceberg expression converter an tests (#426)
776210e is described below
commit 776210e3e49591a47c5d3d93ecd6d5c8b22e19b5
Author: TGooch44 <te...@gmail.com>
AuthorDate: Tue Sep 10 15:57:46 2019 -0700
[python] Adding a string to iceberg expression converter an tests (#426)
---
python/iceberg/api/expressions/expressions.py | 88 ++++++++++++++++
python/setup.py | 2 +
python/tests/api/expressions/test_str_to_expr.py | 128 +++++++++++++++++++++++
3 files changed, 218 insertions(+)
diff --git a/python/iceberg/api/expressions/expressions.py b/python/iceberg/api/expressions/expressions.py
index 72bb562..190382d 100644
--- a/python/iceberg/api/expressions/expressions.py
+++ b/python/iceberg/api/expressions/expressions.py
@@ -15,6 +15,8 @@
# specific language governing permissions and limitations
# under the License.
+import logging
+
from .expression import (And,
FALSE,
Not,
@@ -25,6 +27,8 @@ from .predicate import (Predicate,
UnboundPredicate)
from .reference import NamedReference
+_logger = logging.getLogger(__name__)
+
class Expressions(object):
@@ -122,6 +126,90 @@ class Expressions(object):
def ref(name):
return NamedReference(name)
+ @staticmethod
+ def convert_string_to_expr(predicate_string):
+ from moz_sql_parser import parse
+ from pyparsing import ParseException
+
+ expr_map = {"and": (Expressions.and_,),
+ "eq": (Expressions.equal,),
+ "exists": (Expressions.not_null,),
+ "gt": (Expressions.greater_than,),
+ "gte": (Expressions.greater_than_or_equal,),
+ "lt": (Expressions.less_than,),
+ "lte": (Expressions.less_than_or_equal,),
+ "missing": (Expressions.is_null,),
+ "neq": (Expressions.not_equal,),
+ "not": (Expressions.not_,),
+ "or": (Expressions.or_,)}
+
+ dummy_query = "SELECT * FROM tbl WHERE {}".format(predicate_string) # nosec
+ try:
+ expr = (Expressions.
+ _transform_to_binary_tuples(Expressions.
+ _transform_between_op(parse(dummy_query)["where"])))
+ return Expressions._get_expr(expr, expr_map)
+ except ParseException as pe:
+ _logger.error("Error parsing string expression into iceberg expression: %s" % str(pe))
+ raise
+
+ @staticmethod
+ def _get_expr(node, expr_map):
+ if isinstance(node, dict):
+ for i in node.keys():
+ op = i
+ if op == "literal":
+ return node["literal"]
+ mapped_op = expr_map.get(op, expr_map)
+ if len(mapped_op) == 1:
+ mapped_op = mapped_op[0]
+ if mapped_op is None:
+ raise RuntimeError("no mapping for op: %s" % op)
+ if mapped_op in (Expressions.not_, Expressions.not_null, Expressions.is_null):
+ return mapped_op(Expressions._get_expr(node[op], expr_map))
+
+ return mapped_op(*Expressions._get_expr(node[op], expr_map))
+ elif isinstance(node, (list, tuple)):
+ return (Expressions._get_expr(item, expr_map) for item in node)
+ elif isinstance(node, (str, int, float)):
+ return node
+ else:
+ raise RuntimeError("unknown node type" % node)
+
+ @staticmethod
+ def _transform_to_binary_tuples(expr):
+ if not isinstance(expr, dict):
+ return expr
+ for op in expr.keys():
+ if op in ("exists", "literal", "missing", "not"):
+ return expr
+ new_expr = [Expressions._transform_to_binary_tuples(child)
+ for child in expr[op]]
+ while len(new_expr) > 2:
+ new_and = {op: [new_expr[-2], new_expr[-1]]}
+ new_expr[-2] = new_and
+ del new_expr[-1]
+ expr[op] = new_expr
+
+ return expr
+
+ @staticmethod
+ def _transform_between_op(expr):
+ if isinstance(expr, (bool, float, int, str)):
+ return expr
+ for op, children in expr.items():
+ if op in ("exists", "literal", "missing", "not"):
+ return expr
+ new_children = []
+ for child in children:
+ new_children.append(Expressions._transform_between_op(child))
+ expr[op] = new_children
+ if op == "between":
+ return {"and": [{"gte": [expr[op][0], expr[op][1]]},
+ {"lte": [expr[op][0], expr[op][2]]}]}
+ else:
+ return expr
+
class ExpressionVisitors(object):
diff --git a/python/setup.py b/python/setup.py
index 20f01f2..4ee888b 100755
--- a/python/setup.py
+++ b/python/setup.py
@@ -30,6 +30,8 @@ setup(
'boto3',
'fastavro',
'mmh3',
+ 'moz_sql_parser',
+ 'pyparsing',
'python-dateutil',
'pytz',
'requests',
diff --git a/python/tests/api/expressions/test_str_to_expr.py b/python/tests/api/expressions/test_str_to_expr.py
new file mode 100644
index 0000000..d44d52e
--- /dev/null
+++ b/python/tests/api/expressions/test_str_to_expr.py
@@ -0,0 +1,128 @@
+from iceberg.api.expressions import Expressions
+
+
+def test_equal():
+ expected_expr = Expressions.equal("col_a", 1)
+ conv_expr = Expressions.convert_string_to_expr("col_a=1")
+ assert expected_expr == conv_expr
+
+
+def test_equal_alt_syntax():
+ expected_expr = Expressions.equal("col_a", 1)
+ conv_expr = Expressions.convert_string_to_expr("col_a==1")
+ assert expected_expr == conv_expr
+
+
+def test_gt():
+ expected_expr = Expressions.greater_than("col_a", 1)
+ conv_expr = Expressions.convert_string_to_expr("col_a > 1")
+ assert expected_expr == conv_expr
+
+
+def test_gte():
+ expected_expr = Expressions.greater_than_or_equal("col_a", 1)
+ conv_expr = Expressions.convert_string_to_expr("col_a >= 1")
+ assert expected_expr == conv_expr
+
+
+def test_lt():
+ expected_expr = Expressions.less_than("col_a", 1)
+ conv_expr = Expressions.convert_string_to_expr("col_a < 1")
+ assert expected_expr == conv_expr
+
+
+def test_lte():
+ expected_expr = Expressions.less_than_or_equal("col_a", 1)
+ conv_expr = Expressions.convert_string_to_expr("col_a <= 1")
+ assert expected_expr == conv_expr\
+
+
+def test_and():
+ expected_expr = Expressions.and_(Expressions.equal("col_a", 1), Expressions.equal("col_b", 2))
+ conv_expr = Expressions.convert_string_to_expr("col_a=1 and col_b=2")
+ assert expected_expr == conv_expr
+
+
+def test_or():
+ expected_expr = Expressions.or_(Expressions.equal("col_a", 1), Expressions.equal("col_b", 2))
+ conv_expr = Expressions.convert_string_to_expr("col_a=1 or col_b=2")
+ assert expected_expr == conv_expr
+
+
+def test_between():
+ expected_expr = Expressions.and_(Expressions.greater_than_or_equal("col_a", 1),
+ Expressions.less_than_or_equal("col_a", 2))
+ conv_expr = Expressions.convert_string_to_expr("col_a between 1 and 2")
+ assert expected_expr == conv_expr
+
+
+def test_is_null():
+ expected_expr = Expressions.is_null("col_a")
+ conv_expr = Expressions.convert_string_to_expr("col_a is null")
+ assert expected_expr == conv_expr
+
+
+def test_not_null():
+ expected_expr = Expressions.not_null("col_a")
+ conv_expr = Expressions.convert_string_to_expr("col_a is not null")
+ assert expected_expr == conv_expr
+
+
+def test_not():
+ expected_expr = Expressions.not_("col_a")
+ conv_expr = Expressions.convert_string_to_expr("not col_a")
+ assert expected_expr == conv_expr
+
+
+def test_not_equal():
+ expected_expr = Expressions.not_equal("col_a", 7)
+ conv_expr = Expressions.convert_string_to_expr("col_a <> 7")
+ assert expected_expr == conv_expr
+
+
+def test_not_equal_alt_syntax():
+ expected_expr = Expressions.not_equal("col_a", 7)
+ conv_expr = Expressions.convert_string_to_expr("col_a != 7")
+ assert expected_expr == conv_expr
+
+
+def test_compound_not_equal():
+ expected_expr = Expressions.not_(Expressions.equal("col_a", 7))
+ conv_expr = Expressions.convert_string_to_expr("not (col_a = 7)")
+ assert expected_expr == conv_expr
+
+
+def test_ternary_condition():
+ expected_expr = Expressions.and_(Expressions.equal("col_a", 1),
+ Expressions.and_(Expressions.equal("col_b", 2),
+ Expressions.equal("col_c", 3)))
+
+ conv_expr = Expressions.convert_string_to_expr("col_a=1 and col_b=2 and col_c=3")
+ assert expected_expr == conv_expr
+
+
+def test_precedence():
+ expected_expr = Expressions.and_(Expressions.or_(Expressions.equal("col_a", 1),
+ Expressions.equal("col_b", 2)),
+ Expressions.equal("col_c", 3))
+
+ conv_expr = Expressions.convert_string_to_expr("col_a=1 or col_b=2 and col_c=3")
+ assert expected_expr == conv_expr
+
+
+def test_precedence_with_between():
+ expected_expr = Expressions.or_(Expressions.and_(Expressions.greater_than_or_equal("col_a", 1),
+ Expressions.less_than_or_equal("col_a", 2)),
+ Expressions.equal("col_c", 3))
+
+ conv_expr = Expressions.convert_string_to_expr("col_a between 1 and 2 or col_c=3")
+ assert expected_expr == conv_expr
+
+
+def test_complex_expansion():
+ expected_expr = Expressions.or_(Expressions.and_(Expressions.equal("a", 1),
+ Expressions.and_(Expressions.equal("b", 2),
+ Expressions.not_equal("c", 3))),
+ Expressions.is_null("d"))
+ conv_expr = Expressions.convert_string_to_expr("(a=1 and b=2 and c<>3) or d is null")
+ assert expected_expr == conv_expr