You are viewing a plain text version of this content. The canonical link for it is here.
Posted to github@arrow.apache.org by "paleolimbot (via GitHub)" <gi...@apache.org> on 2023/04/09 01:27:03 UTC

[GitHub] [arrow] paleolimbot commented on a diff in pull request #34972: GH-34971: [Format] Enhance C-Data API to support non-cpu cases

paleolimbot commented on code in PR #34972:
URL: https://github.com/apache/arrow/pull/34972#discussion_r1161175691


##########
cpp/src/arrow/c/abi.h:
##########
@@ -65,6 +65,69 @@ struct ArrowArray {
 
 #endif  // ARROW_C_DATA_INTERFACE
 
+#ifndef ARROW_C_DEVICE_DATA_INTERFACE
+#define ARROW_C_DEVICE_DATA_INTERFACE
+
+// ArrowDeviceType is compatible with dlpack DLDeviceType for portability
+// it uses the same values for each enum as the equivalent kDL<type> from dlpack.h
+#ifdef __cplusplus
+typedef enum : int32_t {
+#else
+typedef enum {
+#endif

Review Comment:
   It seems goofy that one might be able to get a different enum storage type depending on whether or not this is included in a C or C++ source file. The ADBC header specifically avoids enums and I wonder if it would be more predictable to `#define ARROW_DEVICE_TYPE_CPU 1` etc. and use `int32_t device_type` below.



##########
cpp/src/arrow/c/abi.h:
##########
@@ -65,6 +65,69 @@ struct ArrowArray {
 
 #endif  // ARROW_C_DATA_INTERFACE
 
+#ifndef ARROW_C_DEVICE_DATA_INTERFACE
+#define ARROW_C_DEVICE_DATA_INTERFACE
+
+// ArrowDeviceType is compatible with dlpack DLDeviceType for portability
+// it uses the same values for each enum as the equivalent kDL<type> from dlpack.h
+#ifdef __cplusplus
+typedef enum : int32_t {
+#else
+typedef enum {
+#endif
+  // CPU device, same as using ArrowArray directly
+  kArrowCPU = 1,
+  // CUDA GPU Device
+  kArrowCUDA = 2,
+  // Pinned CUDA CPU memory by cudaMallocHost
+  kArrowCUDAHost = 3,
+  // OpenCL Device
+  kArrowOpenCL = 4,
+  // Vulkan buffer for next-gen graphics
+  kArrowVulkan = 7,
+  // Metal for Apple GPU
+  kArrowMetal = 8,
+  // Verilog simulator buffer
+  kArrowVPI = 9,
+  // ROCm GPUs for AMD GPUs
+  kArrowROCM = 10,
+  // Pinned ROCm CPU memory allocated by hipMallocHost
+  kArrowROCMHost = 11,
+  // Reserved for extension
+  // used to quickly test extension devices,
+  // semantics can differ based on the implementation
+  kArrowExtDev = 12,
+  // CUDA managed/unified memory allocated by cudaMallocManaged
+  kArrowCUDAManaged = 13,
+  // unified shared memory allocated on a oneAPI non-partitioned
+  // device. call to oneAPI runtime is required to determine the
+  // device type, the USM allocation type and the sycl context it
+  // is bound to
+  kArrowOneAPI = 14,
+  // GPU support for next-gen WebGPU standard
+  kArrowWebGPU = 15,
+  // Qualcomm Hexagon DSP
+  kArrowHexagon = 16,
+} ArrowDeviceType;
+
+struct ArrowDeviceArray {
+  // the private_date and release callback of the arrow array
+  // should contain any necessary information and structures
+  // related to freeing the array according to the device it
+  // is allocated on, rather than having a separate release
+  // callback embedded here.
+  struct ArrowArray* array;
+  int device_id;

Review Comment:
   (Or if `int` is sufficient, `int32_t` would be more consistent with the existing C data interface)



##########
cpp/src/arrow/c/abi.h:
##########
@@ -106,6 +169,77 @@ struct ArrowArrayStream {
 
 #endif  // ARROW_C_STREAM_INTERFACE
 
+#ifndef ARROW_C_DEVICE_STREAM_INTERFACE
+#define ARROW_C_DEVICE_STREAM_INTERFACE
+
+struct ArrowDeviceArrayStream {
+  // The device that this stream produces data on.
+  // All ArrowDeviceArrays that are produced by this
+  // stream should have the same device_type as set
+  // here. The device_type needs to be provided here
+  // so that consumers can provide the correct type
+  // of stream_ptr when calling get_next.
+  ArrowDeviceType device_type;
+
+  // Callback to get the stream schema
+  // (will be the same for all arrays in the stream).
+  //
+  // Return value: 0 if successful, an `errno`-compatible error code otherwise.
+  //
+  // If successful, the ArrowSchema must be released independently from the stream.
+  int (*get_schema)(struct ArrowDeviceArrayStream*, struct ArrowSchema* out);
+
+  // Callback to get the device id for the next array.
+  // This is necessary so that the proper/correct stream pointer can be provided
+  // to get_next. The parameter provided must not be null.
+  //
+  // Return value: 0 if successful, an `errno`-compatible error code otherwise.
+  //
+  // The next call to `get_next` should provide an ArrowDeviceArray whose
+  // device_id matches what is provided here, and whose device_type is the
+  // same as the device_type member of this stream.
+  int (*get_next_device_id)(struct ArrowDeviceArrayStream*, int* out_device_id);
+
+  // Callback to get the next array
+  // (if no error and the array is released, the stream has ended)
+  //
+  // the provided stream_ptr should be the appropriate stream, or
+  // equivalent object, for the device that the data is allocated on
+  // to indicate where the consumer wants the data to be accessible.

Review Comment:
   Should this `void*` instead be a member of `ArrowDeviceArray`? It seems like that pointer might be useful to anybody attempting to access data in those buffers?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org