You are viewing a plain text version of this content. The canonical link for it is here.
Posted to github@arrow.apache.org by "hermanschaaf (via GitHub)" <gi...@apache.org> on 2023/03/24 13:07:13 UTC

[GitHub] [arrow] hermanschaaf commented on a diff in pull request #34719: GH-34690: [Go]: Add sort indices function

hermanschaaf commented on code in PR #34719:
URL: https://github.com/apache/arrow/pull/34719#discussion_r1147556172


##########
go/arrow/compute/internal/kernels/vector_sort.go:
##########
@@ -0,0 +1,87 @@
+package kernels
+
+import (
+	"bytes"
+	"sort"
+
+	"github.com/apache/arrow/go/v12/arrow"
+	"github.com/apache/arrow/go/v12/arrow/array"
+	"github.com/apache/arrow/go/v12/arrow/compute/internal/exec"
+	"github.com/apache/arrow/go/v12/arrow/memory"
+)
+
+type SortIndicesOptions struct {
+	NullPlacement NullPlacement `compute:"null_placement"`
+}
+
+func (s *SortIndicesOptions) TypeName() string {
+	return "SortIndicesOptions"
+}
+
+type Order int
+
+const (
+	Ascending Order = iota
+	Descending
+)
+
+type NullPlacement int
+
+const (
+	AtStart NullPlacement = iota
+	AtEnd
+)
+
+func GetVectorSortingKernels() []exec.VectorKernel {
+	var base exec.VectorKernel
+	base.CanExecuteChunkWise = true
+	base.OutputChunked = false
+	outType := exec.NewOutputType(arrow.ListOf(arrow.PrimitiveTypes.Int64))
+	kernels := make([]exec.VectorKernel, 0)
+	for _, ty := range primitiveTypes {
+		base.Signature = &exec.KernelSignature{
+			InputTypes: []exec.InputType{exec.NewExactInput(ty)},
+			OutType:    outType,
+		}
+		base.ExecFn = sortExec
+		kernels = append(kernels, base)
+	}
+	return kernels
+}
+
+func sortExec(ctx *exec.KernelCtx, batch *exec.ExecSpan, out *exec.ExecResult) error {
+	// Get the input array from the batch
+	inExecVal := batch.Values[0]
+	inArr := inExecVal.Array
+
+	// Create a slice of indices, initialized to [0, 1, 2, ..., n-1]
+	indices := make([]int, inArr.Len)
+	for i := range indices {
+		indices[i] = i
+	}
+
+	sz := inArr.Type.(arrow.FixedWidthDataType).Bytes()
+
+	// Sort the indices slice based on the values in the input array
+	sort.Slice(indices, func(i, j int) bool {
+		// TODO: not sure what to do here?
+		// compare using scalar comparison?
+		a := inArr.Buffers[1].Buf[indices[i]*sz : (indices[i]+1)*sz]
+		b := inArr.Buffers[1].Buf[indices[j]*sz : (indices[j]+1)*sz]
+		return bytes.Compare(a, b) < 0
+	})

Review Comment:
   @zeroshade So I've gotten this far, but the `bytes.Compare` comparison here was just to get something compiling. 😃  In reality we need to compare the scalar values represented by those bytes, I believe. (I've also only tried to make it work for int32 so far.)
   
   I'm not sure how to compare scalar values inside the array with one another? Are there existing comparators that can be used her? And is there a way to access individual items in an array, other than how I'm doing it here? 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org