You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@arrow.apache.org by "minyoung (via GitHub)" <gi...@apache.org> on 2023/04/18 00:30:22 UTC

[GitHub] [arrow] minyoung opened a new issue, #35202: [Go] Panic reading nested empty list

minyoung opened a new issue, #35202:
URL: https://github.com/apache/arrow/issues/35202

   ### Describe the bug, including details regarding any error messages, version, and platform.
   
   We have some data that is causing a panic to happen. I've stripped the schema and data as much as I could and created a test case that panics:
   
   ```golang
   func (ps *ParquetIOTestSuite) TestNestedEmptyList() {
   	bldr := array.NewStructBuilder(memory.DefaultAllocator, arrow.StructOf(
   		arrow.Field{
   			Name:     "root",
   			Type: arrow.StructOf(
   				arrow.Field{
   					Name:     "child1",
   					Type: arrow.ListOf(arrow.StructOf(
   						arrow.Field{
   							Name:     "child2",
   							Type: arrow.ListOf(arrow.StructOf(
   								arrow.Field{
   									Name:     "name",
   									Type:     arrow.BinaryTypes.String,
   								},
   							)),
   						},
   					)),
   				},
   			),
   		},
   	))
   	defer bldr.Release()
   
   	rootBldr := bldr.FieldBuilder(0).(*array.StructBuilder)
   	child1Bldr := rootBldr.FieldBuilder(0).(*array.ListBuilder)
   	child1ElBldr := child1Bldr.ValueBuilder().(*array.StructBuilder)
   	child2Bldr := child1ElBldr.FieldBuilder(0).(*array.ListBuilder)
   
   	// target structure
   	// {
   	//   "root": {
   	//     "child1": [
   	//       { "child2: [] }
   	//     ]
   	//   }
   	// }
   
   	bldr.Append(true)
   	rootBldr.Append(true)
   	child1Bldr.Append(true)
   
   	child1ElBldr.Append(true)
   	child2Bldr.Append(true)
   
   	arr := bldr.NewArray()
   	defer arr.Release()
   
   	field := arrow.Field{Name: "x", Type: arr.DataType(), Nullable: true}
   	expected := array.NewTable(
   		arrow.NewSchema([]arrow.Field{field}, nil),
   		[]arrow.Column{*arrow.NewColumn(field, arrow.NewChunked(field.Type, []arrow.Array{arr}))},
   		-1,
   	)
   	defer expected.Release()
   
   	ps.roundTripTable(expected, false)
   }
   ```
   
   Test output
   ```
   $ go test ./parquet/pqarrow -run TestParquetArrowIO/TestNestedEmptyList
   panic: runtime error: index out of range [0] with length 0
   
   goroutine 41 [running]:
   github.com/apache/arrow/go/v11/parquet/internal/utils.NewFirstTimeBitmapWriter(...)
           /Users/minyoung/repo/arrow/go/parquet/internal/utils/bitmap_writer.go:83
   github.com/apache/arrow/go/v11/parquet/file.defLevelsToBitmapInternal({0x140003eca00, 0x1, 0x20}, {0x4f008e0?, 0x1?, 0x0?, 0x5108?}, 0x14000155160, 0xc8?)
           /Users/minyoung/repo/arrow/go/parquet/file/level_conversion.go:173 +0x1dc
   github.com/apache/arrow/go/v11/parquet/file.DefLevelsToBitmap(...)
           /Users/minyoung/repo/arrow/go/parquet/file/level_conversion.go:186
   github.com/apache/arrow/go/v11/parquet/pqarrow.(*structReader).BuildArray(0x140003fdec0, 0x0)
           /Users/minyoung/repo/arrow/go/parquet/pqarrow/column_readers.go:279 +0x148
   github.com/apache/arrow/go/v11/parquet/pqarrow.(*listReader).BuildArray(0x1400040e140, 0x1)
           /Users/minyoung/repo/arrow/go/parquet/pqarrow/column_readers.go:396 +0x334
   github.com/apache/arrow/go/v11/parquet/pqarrow.(*structReader).BuildArray(0x14000414000, 0x1)
           /Users/minyoung/repo/arrow/go/parquet/pqarrow/column_readers.go:289 +0x3b0
   github.com/apache/arrow/go/v11/parquet/pqarrow.(*listReader).BuildArray(0x1400040e180, 0x1)
           /Users/minyoung/repo/arrow/go/parquet/pqarrow/column_readers.go:396 +0x334
   github.com/apache/arrow/go/v11/parquet/pqarrow.(*structReader).BuildArray(0x14000414120, 0x1)
           /Users/minyoung/repo/arrow/go/parquet/pqarrow/column_readers.go:289 +0x3b0
   github.com/apache/arrow/go/v11/parquet/pqarrow.(*structReader).BuildArray(0x14000414240, 0x1)
           /Users/minyoung/repo/arrow/go/parquet/pqarrow/column_readers.go:289 +0x3b0
   github.com/apache/arrow/go/v11/parquet/pqarrow.(*ColumnReader).NextBatch(0x140003df490, 0x1400039c6b0?)
           /Users/minyoung/repo/arrow/go/parquet/pqarrow/file_reader.go:134 +0x68
   github.com/apache/arrow/go/v11/parquet/pqarrow.(*FileReader).ReadColumn(0x1400039c780?, {0x140003dc8a8?, 0x14000125b60?, 0x0?}, 0x1400040e0c0?)
           /Users/minyoung/repo/arrow/go/parquet/pqarrow/file_reader.go:247 +0x78
   github.com/apache/arrow/go/v11/parquet/pqarrow.(*FileReader).ReadRowGroups.func1()
           /Users/minyoung/repo/arrow/go/parquet/pqarrow/file_reader.go:341 +0xa4
   created by github.com/apache/arrow/go/v11/parquet/pqarrow.(*FileReader).ReadRowGroups
           /Users/minyoung/repo/arrow/go/parquet/pqarrow/file_reader.go:332 +0x27c
   FAIL    github.com/apache/arrow/go/v11/parquet/pqarrow  0.407s
   FAIL
   ```
   
   ### Component(s)
   
   Go


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscribe@arrow.apache.org.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


[GitHub] [arrow] zeroshade commented on issue #35202: [Go] Panic reading nested empty list

Posted by "zeroshade (via GitHub)" <gi...@apache.org>.
zeroshade commented on issue #35202:
URL: https://github.com/apache/arrow/issues/35202#issuecomment-1527791936

   @minyoung Have you tested that version with the branch that fixes the panics in the PR? 
   
   When I run the sample code you've provided (or update the test in the branch to match) I'm not able to replicate the panic in the branch with the fixes that is attached to this issue.


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


[GitHub] [arrow] minyoung commented on issue #35202: [Go] Panic reading nested empty list

Posted by "minyoung (via GitHub)" <gi...@apache.org>.
minyoung commented on issue #35202:
URL: https://github.com/apache/arrow/issues/35202#issuecomment-1528136802

   @zeroshade I do still encounter a panic. I've created a [PR](https://github.com/apache/arrow/pull/35367) (based on the referenced branch) and the automated tests there also panics


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


[GitHub] [arrow] zeroshade commented on issue #35202: [Go] Panic reading nested empty list

Posted by "zeroshade (via GitHub)" <gi...@apache.org>.
zeroshade commented on issue #35202:
URL: https://github.com/apache/arrow/issues/35202#issuecomment-1518261143

   @minyoung looks like you're building it just fine, I've figured out the issue and put up a PR to fix it which has been linked here. Please take a look and confirm that this fix works for your data that caused the original issue. (I used your example to add a unit test to the code in the PR which passes, but it would still be good to confirm the original cause is solved too)


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


[GitHub] [arrow] minyoung commented on issue #35202: [Go] Panic reading nested empty list

Posted by "minyoung (via GitHub)" <gi...@apache.org>.
minyoung commented on issue #35202:
URL: https://github.com/apache/arrow/issues/35202#issuecomment-1512282863

   Could I be building an empty list incorrectly?


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


[GitHub] [arrow] minyoung commented on issue #35202: [Go] Panic reading nested empty list

Posted by "minyoung (via GitHub)" <gi...@apache.org>.
minyoung commented on issue #35202:
URL: https://github.com/apache/arrow/issues/35202#issuecomment-1535193347

   No worries @zeroshade, thanks for having a look. Yes, we no longer get a panic with our dataset from before 🎉
   Thanks so much!


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


[GitHub] [arrow] zeroshade closed issue #35202: [Go] Panic reading nested empty list

Posted by "zeroshade (via GitHub)" <gi...@apache.org>.
zeroshade closed issue #35202: [Go] Panic reading nested empty list
URL: https://github.com/apache/arrow/issues/35202


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


[GitHub] [arrow] zeroshade commented on issue #35202: [Go] Panic reading nested empty list

Posted by "zeroshade (via GitHub)" <gi...@apache.org>.
zeroshade commented on issue #35202:
URL: https://github.com/apache/arrow/issues/35202#issuecomment-1535157956

   @minyoung Sorry for the delay here, and thanks for providing that PR with the failing test. I've updated the pr #35276 and found the solution to that panic. Please give it a try to confirm it works with your data, or we can just continue our game of whack-a-panic :smile: 


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


[GitHub] [arrow] minyoung commented on issue #35202: [Go] Panic reading nested empty list

Posted by "minyoung (via GitHub)" <gi...@apache.org>.
minyoung commented on issue #35202:
URL: https://github.com/apache/arrow/issues/35202#issuecomment-1526790176

   Thanks for having a look @zeroshade!
   
   Some of the panics are gone now 🎉 
   Unsure if related or not but we do get a panic with a slightly more populated version of our dataset:
   
   ```golang
   func (ps *ParquetIOTestSuite) TestNestedEmptyList() {
   	bldr := array.NewStructBuilder(memory.DefaultAllocator, arrow.StructOf(
   		arrow.Field{
   			Nullable: true,
   			Name:     "root",
   			Type: arrow.StructOf(
   				arrow.Field{
   					Nullable: true,
   					Name:     "child1",
   					Type: arrow.ListOf(arrow.StructOf(
   						arrow.Field{
   							Nullable: true,
   							Name:     "child2",
   							Type: arrow.ListOf(arrow.StructOf(
   								arrow.Field{
   									Nullable: true,
   									Name:     "name",
   									Type:     arrow.BinaryTypes.String,
   								},
   							)),
   						},
   					)),
   				},
   			),
   		},
   	))
   	defer bldr.Release()
   
   	rootBldr := bldr.FieldBuilder(0).(*array.StructBuilder)
   	child1Bldr := rootBldr.FieldBuilder(0).(*array.ListBuilder)
   	child1ElBldr := child1Bldr.ValueBuilder().(*array.StructBuilder)
   	child2Bldr := child1ElBldr.FieldBuilder(0).(*array.ListBuilder)
   	leafBldr := child2Bldr.ValueBuilder().(*array.StructBuilder)
   	nameBldr := leafBldr.FieldBuilder(0).(*array.StringBuilder)
   
   	// target structure 8 times
   	// {
   	//   "root": {
   	//     "child1": [
   	//       { "child2": [{ "name": "foo" }] },
   	//       { "child2": [] }
   	//     ]
   	//   }
   	// }
   
   	for i := 0; i < 8; i++ {
   		bldr.Append(true)
   		rootBldr.Append(true)
   		child1Bldr.Append(true)
   
   		child1ElBldr.Append(true)
   		child2Bldr.Append(true)
   		leafBldr.Append(true)
   		nameBldr.Append("foo")
   
   		child1ElBldr.Append(true)
   		child2Bldr.Append(true)
   	}
   
   	arr := bldr.NewArray()
   	defer arr.Release()
   
   	field := arrow.Field{Name: "x", Type: arr.DataType(), Nullable: true}
   	expected := array.NewTable(
   		arrow.NewSchema([]arrow.Field{field}, nil),
   		[]arrow.Column{*arrow.NewColumn(field, arrow.NewChunked(field.Type, []arrow.Array{arr}))},
   		-1,
   	)
   	defer expected.Release()
   
   	ps.roundTripTable(expected, false)
   }
   ```


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org