You are viewing a plain text version of this content. The canonical link for it is here.
Posted to github@arrow.apache.org by GitBox <gi...@apache.org> on 2020/10/23 13:21:59 UTC

[GitHub] [arrow] rok commented on a change in pull request #8510: ARROW-1614: [C++] Add a Tensor logical value type with constant dimensions, implemented using ExtensionType

rok commented on a change in pull request #8510:
URL: https://github.com/apache/arrow/pull/8510#discussion_r510879292



##########
File path: python/pyarrow/tests/test_extension_type.py
##########
@@ -76,6 +76,95 @@ def __reduce__(self):
         return MyListType, (self.storage_type,)
 
 
+def _tensor_to_array(obj, dtype):
+    batch_size = obj.shape[0]
+    element_shape = obj.shape[1:]
+    total_num_elements = obj.size
+    num_elements = 1 if len(obj.shape) == 1 else np.prod(element_shape)
+
+    child_buf = pa.py_buffer(obj)
+    child_array = pa.Array.from_buffers(
+        dtype, total_num_elements, [None, child_buf])
+
+    offset_buf = pa.py_buffer(
+        np.int32([i * num_elements for i in range(batch_size + 1)]))
+
+    storage = pa.Array.from_buffers(pa.list_(dtype), batch_size,
+                                    [None, offset_buf], children=[child_array])
+
+    typ = TensorType(element_shape, dtype)
+    return pa.ExtensionArray.from_storage(typ, storage)
+
+
+class TensorArray(pa.ExtensionArray):
+    """
+    Concrete class for Arrow arrays of Tensor data type.
+    """
+
+    @classmethod
+    def from_numpy(cls, obj):
+        """
+        Convert a single contiguous numpy.ndarray to TensorArray.
+        """
+        assert isinstance(obj, np.ndarray)
+        if not obj.flags.c_contiguous:
+            obj = np.ascontiguousarray(obj)
+        dtype = pa.from_numpy_dtype(obj.dtype)
+
+        return _tensor_to_array(obj, dtype)
+
+    @classmethod
+    def from_tensor(cls, obj):
+        """
+        Convert a single contiguous pyarrow.Tensor to a TensorArray.
+        """
+        assert isinstance(obj, pa.Tensor)
+        assert obj.is_contiguous
+        dtype = obj.type
+
+        return _tensor_to_array(obj, dtype)
+
+    def to_numpy(self):
+        """
+        Convert TensorArray to numpy.ndarray.
+        """
+        shape = (len(self),) + self.type.shape
+        buf = self.storage.buffers()[3]
+        storage_list_type = self.storage.type
+        ext_dtype = storage_list_type.value_type.to_pandas_dtype()
+
+        return np.ndarray(shape, buffer=buf, dtype=ext_dtype)
+
+    def to_tensor(self):
+        """
+        Convert TensorArray to pyarrow.Tensor.
+        """
+        return pa.Tensor.from_numpy(self.to_numpy())
+
+
+class TensorType(pa.PyExtensionType):

Review comment:
       Hey @BryanCutler. Indeed this is meant just for discussing the design before implementing in C++.




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org