You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@iceberg.apache.org by dw...@apache.org on 2019/08/19 23:43:57 UTC

[incubator-iceberg] branch master updated: Bringing type module into sync (#382)

This is an automated email from the ASF dual-hosted git repository.

dweeks pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-iceberg.git


The following commit(s) were added to refs/heads/master by this push:
     new 7acf141  Bringing type module into sync (#382)
7acf141 is described below

commit 7acf14193544e2ba529bdd53f5e7bcf3dbe9b559
Author: TGooch44 <te...@gmail.com>
AuthorDate: Mon Aug 19 16:43:51 2019 -0700

    Bringing type module into sync (#382)
---
 python/iceberg/api/types/conversions.py | 14 ++++----
 python/iceberg/api/types/type.py        |  7 +---
 python/iceberg/api/types/type_util.py   | 62 +++++++++++++++++++--------------
 python/iceberg/api/types/types.py       | 28 +++++++++------
 4 files changed, 59 insertions(+), 52 deletions(-)

diff --git a/python/iceberg/api/types/conversions.py b/python/iceberg/api/types/conversions.py
index a8bc3f5..6768b61 100644
--- a/python/iceberg/api/types/conversions.py
+++ b/python/iceberg/api/types/conversions.py
@@ -82,20 +82,18 @@ class Conversions(object):
 
     @staticmethod
     def to_byte_buffer(type_var, value):
-        byte_buf_func = Conversions.to_byte_buff_mapping.get(type_var.type_id)
-        if byte_buf_func is None:
+        try:
+            return Conversions.to_byte_buff_mapping.get(type_var.type_id)(type_var, value)
+        except KeyError:
             raise RuntimeError("Cannot Serialize Type: %s" % type_var)
 
-        return byte_buf_func(type_var, value)
-
     @staticmethod
     def from_byte_buffer(type_var, buffer_var):
         return Conversions.internal_from_byte_buffer(type_var, buffer_var)
 
     @staticmethod
     def internal_from_byte_buffer(type_var, buffer_var):
-        byte_buf_func = Conversions.from_byte_buff_mapping.get(type_var.type_id)
-        if byte_buf_func is None:
+        try:
+            return Conversions.from_byte_buff_mapping[type_var.type_id](type_var.type_id, buffer_var)
+        except KeyError:
             raise RuntimeError("Cannot Serialize Type: %s" % type_var)
-
-        return byte_buf_func(type_var.type_id, buffer_var)
diff --git a/python/iceberg/api/types/type.py b/python/iceberg/api/types/type.py
index c855637..367e366 100644
--- a/python/iceberg/api/types/type.py
+++ b/python/iceberg/api/types/type.py
@@ -81,12 +81,7 @@ class Type(object):
 class PrimitiveType(Type):
 
     def __eq__(self, other):
-        if id(self) == id(other):
-            return True
-        elif other is None or not isinstance(other, PrimitiveType):
-            return False
-
-        return True
+        return type(self) == type(other)
 
     def __ne__(self, other):
         return not self.__eq__(other)
diff --git a/python/iceberg/api/types/type_util.py b/python/iceberg/api/types/type_util.py
index 7898262..7bc8dd7 100644
--- a/python/iceberg/api/types/type_util.py
+++ b/python/iceberg/api/types/type_util.py
@@ -52,9 +52,9 @@ def select(schema, field_ids):
         return schema
     elif result is not None:
         if schema.get_aliases() is not None:
-            return iceberg.api.schema.Schema(result.as_nested_type(), schema.get_aliases())
+            return iceberg.api.schema.Schema(result.as_nested_type().fields, schema.get_aliases())
         else:
-            return iceberg.api.schema.Schema(result.as_nested_type())
+            return iceberg.api.schema.Schema(result.as_nested_type().fields)
 
     return iceberg.api.schema.Schema(list(), schema.get_aliases())
 
@@ -161,6 +161,11 @@ def visit_custom_order(arg, visitor):
                 results.append(VisitFieldFuture(field, visitor))
             struct = visitor.struct(struct, [x.get() for x in results])
             return struct
+        elif type_var.type_id == TypeID.LIST:
+            list_var = type_var.as_nested_type().as_list_type()
+            return visitor.list(list_var, VisitFuture(list_var.element_type, visitor))
+        elif type_var.type_id == TypeID.MAP:
+            raise NotImplementedError()
 
         return visitor.primitive(type_var.as_primitive_type())
 
@@ -172,22 +177,22 @@ class SchemaVisitor(object):
         self.field_ids = list()
 
     def schema(self, schema, struct_result):
-        return NotImplementedError()
+        return None
 
     def struct(self, struct, field_results):
-        return NotImplementedError()
+        return None
 
     def field(self, field, field_result):
-        return NotImplementedError()
+        return None
 
     def list(self, list_var, element_result):
-        return NotImplementedError()
+        return None
 
     def map(self, map_var, key_result, value_result):
-        return NotImplementedError()
+        return None
 
     def primitive(self, primitive_var):
-        return NotImplementedError()
+        return None
 
 
 class CustomOrderSchemaVisitor(object):
@@ -195,22 +200,22 @@ class CustomOrderSchemaVisitor(object):
         super(CustomOrderSchemaVisitor, self).__init__()
 
     def schema(self, schema, struct_result):
-        return NotImplementedError()
+        return None
 
     def struct(self, struct, field_results):
-        return NotImplementedError()
+        return None
 
     def field(self, field, field_result):
-        return NotImplementedError()
+        return None
 
     def list(self, list_var, element_result):
-        return NotImplementedError()
+        return None
 
     def map(self, map_var, key_result, value_result):
-        return NotImplementedError()
+        return None
 
     def primitive(self, primitive_var):
-        return NotImplementedError()
+        return None
 
 
 class VisitFuture(object):
@@ -245,7 +250,7 @@ class GetProjectedIds(SchemaVisitor):
 
     def __init__(self):
         super(GetProjectedIds, self).__init__()
-        self.field_ids = set()
+        self.field_ids = list()
 
     def schema(self, schema, struct_result):
         return self.field_ids
@@ -254,22 +259,22 @@ class GetProjectedIds(SchemaVisitor):
         return self.field_ids
 
     def field(self, field, field_result):
-        if field_result is not None:
-            self.field_ids.add(field.field_id)
+        if field_result is None:
+            self.field_ids.append(field.field_id)
 
         return self.field_ids
 
     def list(self, list_var, element_result):
         if element_result is None:
-            for field in list_var.fields:
-                self.field_ids.add(field.field_id)
+            for field in list_var.fields():
+                self.field_ids.append(field.field_id)
 
         return self.field_ids
 
     def map(self, map_var, key_result, value_result):
         if value_result is None:
-            for field in map_var.fields:
-                self.field_ids.add(field.field_id)
+            for field in map_var.fields():
+                self.field_ids.append(field.field_id)
 
         return self.field_ids
 
@@ -278,7 +283,7 @@ class PruneColumns(SchemaVisitor):
 
     def __init__(self, selected):
         super(PruneColumns, self).__init__()
-        self.selected = selected
+        self.selected = list(selected)
 
     def schema(self, schema, struct_result):
         return struct_result
@@ -288,10 +293,10 @@ class PruneColumns(SchemaVisitor):
         selected_fields = list()
         same_types = True
 
-        for i, field in enumerate(field_results):
-            projected_type = field_results[i]
+        for i, projected_type in enumerate(field_results):
+            field = fields[i]
             if projected_type is not None:
-                if field.type_id == projected_type.type_id:
+                if field.type == projected_type:
                     selected_fields.append(field)
                 elif projected_type is not None:
                     same_types = False
@@ -316,6 +321,9 @@ class PruneColumns(SchemaVisitor):
         elif field_result is not None:
             return field_result
 
+    def primitive(self, primitive_var):
+        return None
+
 
 class IndexByName(SchemaVisitor):
 
@@ -416,9 +424,9 @@ class AssignFreshIds(CustomOrderSchemaVisitor):
     def list(self, list_var, element_result):
         new_id = self.next_id()
         if list_var.is_element_optional():
-            return ListType.of_optional(new_id, element_result())
+            return ListType.of_optional(new_id, element_result.get())
         else:
-            return ListType.of_required(new_id, element_result())
+            return ListType.of_required(new_id, element_result.get())
 
     def map(self, map_var, key_result, value_result):
         new_key_id = self.next_id()
diff --git a/python/iceberg/api/types/types.py b/python/iceberg/api/types/types.py
index 225bdee..941c9bc 100644
--- a/python/iceberg/api/types/types.py
+++ b/python/iceberg/api/types/types.py
@@ -375,10 +375,10 @@ class DecimalType(PrimitiveType):
         return DecimalType(precison, scale)
 
     def __init__(self, precision, scale):
-        if precision > 38:
+        if int(precision) > 38:
             raise RuntimeError("Decimals with precision larger than 38 are not supported: %s", precision)
-        self.precision = precision
-        self.scale = scale
+        self.precision = int(precision)
+        self.scale = int(scale)
 
     @property
     def type_id(self):
@@ -410,18 +410,19 @@ class DecimalType(PrimitiveType):
 
 class NestedField():
     @staticmethod
-    def optional(id, name, type_var):
-        return NestedField(True, id, name, type_var)
+    def optional(id, name, type_var, doc=None):
+        return NestedField(True, id, name, type_var, doc=doc)
 
     @staticmethod
-    def required(id, name, type):
-        return NestedField(False, id, name, type)
+    def required(id, name, type, doc=None):
+        return NestedField(False, id, name, type, doc=doc)
 
-    def __init__(self, is_optional, id, name, type):
+    def __init__(self, is_optional, id, name, type, doc=None):
         self.is_optional = is_optional
         self.id = id
         self.name = name
         self.type = type
+        self.doc = doc
 
     @property
     def is_required(self):
@@ -432,7 +433,11 @@ class NestedField():
         return self.id
 
     def __repr__(self):
-        return "%s: %s: %s %s" % (self.id, self.name, "optional" if self.is_optional else "required", self.type)
+        return "%s: %s: %s %s(%s)" % (self.id,
+                                      self.name,
+                                      "optional" if self.is_optional else "required",
+                                      self.type,
+                                      self.doc)
 
     def __str__(self):
         return self.__repr__()
@@ -445,7 +450,8 @@ class NestedField():
 
         return self.is_optional == other.is_optional \
             and self.id == other.id \
-            and self.name == other.name and self.type == other.type
+            and self.name == other.name and self.type == other.type \
+            and self.doc == other.doc
 
     def __ne__(self, other):
         return not self.__eq__(other)
@@ -455,7 +461,7 @@ class NestedField():
 
     def __key(self):
         type_name = self.type.type_id.name
-        return NestedField.__class__, self.is_optional, self.id, self.name, type_name
+        return NestedField.__class__, self.is_optional, self.id, self.name, self.doc, type_name
 
 
 class StructType(NestedType):