You are viewing a plain text version of this content. The canonical link for it is here.
Posted to github@arrow.apache.org by "paleolimbot (via GitHub)" <gi...@apache.org> on 2023/02/02 15:51:18 UTC

[GitHub] [arrow] paleolimbot commented on pull request #33925: GH-33923: [Docs] Tensor canonical extension type specification

paleolimbot commented on PR #33925:
URL: https://github.com/apache/arrow/pull/33925#issuecomment-1413961867

   This looks awesome!
   
   A hacked together R implementation (worked great!)
   
   <details>
   
   ```
   library(R6)
   library(rlang)
   library(arrow, warn.conflicts = FALSE)
   #> Some features are not enabled in this build of Arrow. Run `arrow_info()` for more information.
   
   FixedShapeTensor <- R6Class(
     "FixedShapeTensor",
     inherit = ExtensionType,
     public = list(
       deserialize_instance = function() {
         storage <- self$storage_type()
         if (!inherits(storage, "FixedSizeListType")) {
           stop("storage for a FixedShapeTensor must be a FixedSizeList")
         }
         
         parsed <- jsonlite::fromJSON(self$extension_metadata_utf8())
         private$shape <- as.integer(parsed$shape)
   
         if (is.null(parsed$shape)) {
           stop("Key 'shape' missing in JSON metdata")
         }
         
         if (prod(parsed$shape) != storage$list_size) {
           stop(
             sprintf(
               "FixedSizeTensor of shape %s must have storage with fixed sized list[%s]",
               paste(parsed$shape, collapse = ", "),
               prod(parsed$shape)
             )
           )
         }
       },
       as_vector = function(extension_array) {
         result <- super$as_vector(extension_array)
         # R's matrices are row-major and this extension type specifies
         # column-major storage
         lapply(result, function(x) {
           dim(x) <- rev(private$shape)
           x[] <- aperm(x, rev(seq_along(dim(x))))
           dim(x) <- private$shape
           x
         })
         
       }
     ),
     private = list(
       shape = NULL
     )
   )
   
   fixed_shape_tensor <- function(value_type, shape) {
     shape <- as.integer(shape)
     
     new_extension_type(
       storage_type = fixed_size_list_of(value_type, prod(shape)),
       extension_name = "arrow.fixed_shape_tensor",
       extension_metadata = jsonlite::toJSON(list(shape = as.integer(shape))),
       type_class = FixedShapeTensor
     )
   }
   
   fixed_shape_tensor_array <- function(x, value_type = NULL) {
     shape <- dim(x)
     x[] <- aperm(x, rev(seq_along(dim(x))))
     dim(x) <- NULL
     
     if (is.null(value_type)) {
       value_type <- infer_type(x[integer(0)])
     }
     
     storage <- as_arrow_array(
       list(x),
       type = fixed_size_list_of(value_type, list_size = prod(shape))
     )
     
     new_extension_array(
       storage,
       fixed_shape_tensor(value_type, shape)
     )
   }
   
   (type <- fixed_shape_tensor(int32(), c(2, 3)))
   #> FixedShapeTensor
   #> FixedShapeTensor <{"shape":[2,3]}>
   
   (r_matrix <- matrix(1:6, nrow = 2, ncol = 3))
   #>      [,1] [,2] [,3]
   #> [1,]    1    3    5
   #> [2,]    2    4    6
   (array <- fixed_shape_tensor_array(r_matrix))
   #> ExtensionArray
   #> <FixedShapeTensor <{"shape":[2,3]}>>
   #> [
   #>   [
   #>     1,
   #>     3,
   #>     5,
   #>     2,
   #>     4,
   #>     6
   #>   ]
   #> ]
   array$as_vector()
   #> [[1]]
   #>      [,1] [,2] [,3]
   #> [1,]    1    3    5
   #> [2,]    2    4    6
   ```
   
   </details>


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org