You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@iceberg.apache.org by dw...@apache.org on 2019/08/19 23:43:57 UTC
[incubator-iceberg] branch master updated: Bringing type module
into sync (#382)
This is an automated email from the ASF dual-hosted git repository.
dweeks pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-iceberg.git
The following commit(s) were added to refs/heads/master by this push:
new 7acf141 Bringing type module into sync (#382)
7acf141 is described below
commit 7acf14193544e2ba529bdd53f5e7bcf3dbe9b559
Author: TGooch44 <te...@gmail.com>
AuthorDate: Mon Aug 19 16:43:51 2019 -0700
Bringing type module into sync (#382)
---
python/iceberg/api/types/conversions.py | 14 ++++----
python/iceberg/api/types/type.py | 7 +---
python/iceberg/api/types/type_util.py | 62 +++++++++++++++++++--------------
python/iceberg/api/types/types.py | 28 +++++++++------
4 files changed, 59 insertions(+), 52 deletions(-)
diff --git a/python/iceberg/api/types/conversions.py b/python/iceberg/api/types/conversions.py
index a8bc3f5..6768b61 100644
--- a/python/iceberg/api/types/conversions.py
+++ b/python/iceberg/api/types/conversions.py
@@ -82,20 +82,18 @@ class Conversions(object):
@staticmethod
def to_byte_buffer(type_var, value):
- byte_buf_func = Conversions.to_byte_buff_mapping.get(type_var.type_id)
- if byte_buf_func is None:
+ try:
+ return Conversions.to_byte_buff_mapping.get(type_var.type_id)(type_var, value)
+ except KeyError:
raise RuntimeError("Cannot Serialize Type: %s" % type_var)
- return byte_buf_func(type_var, value)
-
@staticmethod
def from_byte_buffer(type_var, buffer_var):
return Conversions.internal_from_byte_buffer(type_var, buffer_var)
@staticmethod
def internal_from_byte_buffer(type_var, buffer_var):
- byte_buf_func = Conversions.from_byte_buff_mapping.get(type_var.type_id)
- if byte_buf_func is None:
+ try:
+ return Conversions.from_byte_buff_mapping[type_var.type_id](type_var.type_id, buffer_var)
+ except KeyError:
raise RuntimeError("Cannot Serialize Type: %s" % type_var)
-
- return byte_buf_func(type_var.type_id, buffer_var)
diff --git a/python/iceberg/api/types/type.py b/python/iceberg/api/types/type.py
index c855637..367e366 100644
--- a/python/iceberg/api/types/type.py
+++ b/python/iceberg/api/types/type.py
@@ -81,12 +81,7 @@ class Type(object):
class PrimitiveType(Type):
def __eq__(self, other):
- if id(self) == id(other):
- return True
- elif other is None or not isinstance(other, PrimitiveType):
- return False
-
- return True
+ return type(self) == type(other)
def __ne__(self, other):
return not self.__eq__(other)
diff --git a/python/iceberg/api/types/type_util.py b/python/iceberg/api/types/type_util.py
index 7898262..7bc8dd7 100644
--- a/python/iceberg/api/types/type_util.py
+++ b/python/iceberg/api/types/type_util.py
@@ -52,9 +52,9 @@ def select(schema, field_ids):
return schema
elif result is not None:
if schema.get_aliases() is not None:
- return iceberg.api.schema.Schema(result.as_nested_type(), schema.get_aliases())
+ return iceberg.api.schema.Schema(result.as_nested_type().fields, schema.get_aliases())
else:
- return iceberg.api.schema.Schema(result.as_nested_type())
+ return iceberg.api.schema.Schema(result.as_nested_type().fields)
return iceberg.api.schema.Schema(list(), schema.get_aliases())
@@ -161,6 +161,11 @@ def visit_custom_order(arg, visitor):
results.append(VisitFieldFuture(field, visitor))
struct = visitor.struct(struct, [x.get() for x in results])
return struct
+ elif type_var.type_id == TypeID.LIST:
+ list_var = type_var.as_nested_type().as_list_type()
+ return visitor.list(list_var, VisitFuture(list_var.element_type, visitor))
+ elif type_var.type_id == TypeID.MAP:
+ raise NotImplementedError()
return visitor.primitive(type_var.as_primitive_type())
@@ -172,22 +177,22 @@ class SchemaVisitor(object):
self.field_ids = list()
def schema(self, schema, struct_result):
- return NotImplementedError()
+ return None
def struct(self, struct, field_results):
- return NotImplementedError()
+ return None
def field(self, field, field_result):
- return NotImplementedError()
+ return None
def list(self, list_var, element_result):
- return NotImplementedError()
+ return None
def map(self, map_var, key_result, value_result):
- return NotImplementedError()
+ return None
def primitive(self, primitive_var):
- return NotImplementedError()
+ return None
class CustomOrderSchemaVisitor(object):
@@ -195,22 +200,22 @@ class CustomOrderSchemaVisitor(object):
super(CustomOrderSchemaVisitor, self).__init__()
def schema(self, schema, struct_result):
- return NotImplementedError()
+ return None
def struct(self, struct, field_results):
- return NotImplementedError()
+ return None
def field(self, field, field_result):
- return NotImplementedError()
+ return None
def list(self, list_var, element_result):
- return NotImplementedError()
+ return None
def map(self, map_var, key_result, value_result):
- return NotImplementedError()
+ return None
def primitive(self, primitive_var):
- return NotImplementedError()
+ return None
class VisitFuture(object):
@@ -245,7 +250,7 @@ class GetProjectedIds(SchemaVisitor):
def __init__(self):
super(GetProjectedIds, self).__init__()
- self.field_ids = set()
+ self.field_ids = list()
def schema(self, schema, struct_result):
return self.field_ids
@@ -254,22 +259,22 @@ class GetProjectedIds(SchemaVisitor):
return self.field_ids
def field(self, field, field_result):
- if field_result is not None:
- self.field_ids.add(field.field_id)
+ if field_result is None:
+ self.field_ids.append(field.field_id)
return self.field_ids
def list(self, list_var, element_result):
if element_result is None:
- for field in list_var.fields:
- self.field_ids.add(field.field_id)
+ for field in list_var.fields():
+ self.field_ids.append(field.field_id)
return self.field_ids
def map(self, map_var, key_result, value_result):
if value_result is None:
- for field in map_var.fields:
- self.field_ids.add(field.field_id)
+ for field in map_var.fields():
+ self.field_ids.append(field.field_id)
return self.field_ids
@@ -278,7 +283,7 @@ class PruneColumns(SchemaVisitor):
def __init__(self, selected):
super(PruneColumns, self).__init__()
- self.selected = selected
+ self.selected = list(selected)
def schema(self, schema, struct_result):
return struct_result
@@ -288,10 +293,10 @@ class PruneColumns(SchemaVisitor):
selected_fields = list()
same_types = True
- for i, field in enumerate(field_results):
- projected_type = field_results[i]
+ for i, projected_type in enumerate(field_results):
+ field = fields[i]
if projected_type is not None:
- if field.type_id == projected_type.type_id:
+ if field.type == projected_type:
selected_fields.append(field)
elif projected_type is not None:
same_types = False
@@ -316,6 +321,9 @@ class PruneColumns(SchemaVisitor):
elif field_result is not None:
return field_result
+ def primitive(self, primitive_var):
+ return None
+
class IndexByName(SchemaVisitor):
@@ -416,9 +424,9 @@ class AssignFreshIds(CustomOrderSchemaVisitor):
def list(self, list_var, element_result):
new_id = self.next_id()
if list_var.is_element_optional():
- return ListType.of_optional(new_id, element_result())
+ return ListType.of_optional(new_id, element_result.get())
else:
- return ListType.of_required(new_id, element_result())
+ return ListType.of_required(new_id, element_result.get())
def map(self, map_var, key_result, value_result):
new_key_id = self.next_id()
diff --git a/python/iceberg/api/types/types.py b/python/iceberg/api/types/types.py
index 225bdee..941c9bc 100644
--- a/python/iceberg/api/types/types.py
+++ b/python/iceberg/api/types/types.py
@@ -375,10 +375,10 @@ class DecimalType(PrimitiveType):
return DecimalType(precison, scale)
def __init__(self, precision, scale):
- if precision > 38:
+ if int(precision) > 38:
raise RuntimeError("Decimals with precision larger than 38 are not supported: %s", precision)
- self.precision = precision
- self.scale = scale
+ self.precision = int(precision)
+ self.scale = int(scale)
@property
def type_id(self):
@@ -410,18 +410,19 @@ class DecimalType(PrimitiveType):
class NestedField():
@staticmethod
- def optional(id, name, type_var):
- return NestedField(True, id, name, type_var)
+ def optional(id, name, type_var, doc=None):
+ return NestedField(True, id, name, type_var, doc=doc)
@staticmethod
- def required(id, name, type):
- return NestedField(False, id, name, type)
+ def required(id, name, type, doc=None):
+ return NestedField(False, id, name, type, doc=doc)
- def __init__(self, is_optional, id, name, type):
+ def __init__(self, is_optional, id, name, type, doc=None):
self.is_optional = is_optional
self.id = id
self.name = name
self.type = type
+ self.doc = doc
@property
def is_required(self):
@@ -432,7 +433,11 @@ class NestedField():
return self.id
def __repr__(self):
- return "%s: %s: %s %s" % (self.id, self.name, "optional" if self.is_optional else "required", self.type)
+ return "%s: %s: %s %s(%s)" % (self.id,
+ self.name,
+ "optional" if self.is_optional else "required",
+ self.type,
+ self.doc)
def __str__(self):
return self.__repr__()
@@ -445,7 +450,8 @@ class NestedField():
return self.is_optional == other.is_optional \
and self.id == other.id \
- and self.name == other.name and self.type == other.type
+ and self.name == other.name and self.type == other.type \
+ and self.doc == other.doc
def __ne__(self, other):
return not self.__eq__(other)
@@ -455,7 +461,7 @@ class NestedField():
def __key(self):
type_name = self.type.type_id.name
- return NestedField.__class__, self.is_optional, self.id, self.name, type_name
+ return NestedField.__class__, self.is_optional, self.id, self.name, self.doc, type_name
class StructType(NestedType):