You are viewing a plain text version of this content. The canonical link for it is here.
Posted to github@arrow.apache.org by "zeroshade (via GitHub)" <gi...@apache.org> on 2023/06/19 23:27:39 UTC

[GitHub] [arrow] zeroshade commented on a diff in pull request #36142: GH-36141: [Go] Support large and fixed types in csv

zeroshade commented on code in PR #36142:
URL: https://github.com/apache/arrow/pull/36142#discussion_r1234552008


##########
go/arrow/csv/reader.go:
##########
@@ -783,6 +814,64 @@ func (r *Reader) parseList(field array.Builder, str string) {
 	}
 }
 
+func (r *Reader) parseLargeList(field array.Builder, str string) {
+	if r.isNull(str) {
+		field.AppendNull()
+		return
+	}
+	if !(strings.HasPrefix(str, "{") && strings.HasSuffix(str, "}")) {
+		r.err = errors.New("invalid list format. should start with '{' and end with '}'")
+		return
+	}
+	str = strings.Trim(str, "{}")
+	largeListBldr := field.(*array.LargeListBuilder)
+	largeListBldr.Append(true)
+	if len(str) == 0 {
+		// we don't want to create the csv reader if we already know the
+		// string is empty
+		return
+	}
+	valueBldr := largeListBldr.ValueBuilder()
+	reader := csv.NewReader(strings.NewReader(str))
+	items, err := reader.Read()
+	if err != nil {
+		r.err = err
+		return
+	}
+	for _, str := range items {
+		r.initFieldConverter(valueBldr)(str)
+	}
+}
+
+func (r *Reader) parseFixedSizeList(field array.Builder, str string) {
+	if r.isNull(str) {
+		field.AppendNull()
+		return
+	}
+	if !(strings.HasPrefix(str, "{") && strings.HasSuffix(str, "}")) {
+		r.err = errors.New("invalid list format. should start with '{' and end with '}'")
+		return
+	}
+	str = strings.Trim(str, "{}")
+	fixedSizeListBldr := field.(*array.FixedSizeListBuilder)
+	fixedSizeListBldr.Append(true)
+	if len(str) == 0 {
+		// we don't want to create the csv reader if we already know the
+		// string is empty
+		return
+	}
+	valueBldr := fixedSizeListBldr.ValueBuilder()
+	reader := csv.NewReader(strings.NewReader(str))
+	items, err := reader.Read()
+	if err != nil {
+		r.err = err
+		return
+	}

Review Comment:
   Add a validation that the number of items matches the `n` of the list, erroring if they pass the wrong number of items.



##########
go/arrow/csv/reader.go:
##########
@@ -796,6 +885,32 @@ func (r *Reader) parseBinaryType(field array.Builder, str string) {
 	field.(*array.BinaryBuilder).Append(decodedVal)
 }
 
+func (r *Reader) parseLargeBinaryType(field array.Builder, str string) {
+	// specialize the implementation when we know we cannot have nulls
+	if r.isNull(str) {
+		field.AppendNull()
+		return
+	}
+	decodedVal, err := base64.StdEncoding.DecodeString(str)
+	if err != nil {
+		panic("cannot decode base64 string " + str)
+	}
+	field.(*array.BinaryBuilder).Append(decodedVal)
+}
+
+func (r *Reader) parseFixedSizeBinaryType(field array.Builder, str string) {
+	// specialize the implementation when we know we cannot have nulls
+	if r.isNull(str) {
+		field.AppendNull()
+		return
+	}
+	decodedVal, err := base64.StdEncoding.DecodeString(str)
+	if err != nil {
+		panic("cannot decode base64 string " + str)
+	}
+	field.(*array.FixedSizeBinaryBuilder).Append(decodedVal)

Review Comment:
   `Append` will panic if it is the wrong size, can we check the size first to ensure it is the write length for the fixed size binary and error (or truncate / pad) if it is incorrect?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org