You are viewing a plain text version of this content. The canonical link for it is here.
Posted to github@beam.apache.org by GitBox <gi...@apache.org> on 2022/06/02 01:47:58 UTC

[GitHub] [beam] youngoli commented on a diff in pull request #16818: [BEAM-13806] Add x-lang BigQuery IO integration test to Go SDK.

youngoli commented on code in PR #16818:
URL: https://github.com/apache/beam/pull/16818#discussion_r887430144


##########
sdks/go/test/integration/io/xlang/bigquery/bigquery.go:
##########
@@ -0,0 +1,114 @@
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements.  See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License.  You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package bigquery contains integration tests for cross-language BigQuery IO transforms.
+package bigquery
+
+import (
+	"math/rand"
+	"reflect"
+	"strings"
+	"time"
+
+	"github.com/apache/beam/sdks/v2/go/pkg/beam"
+	"github.com/apache/beam/sdks/v2/go/pkg/beam/io/xlang/bigqueryio"
+	"github.com/apache/beam/sdks/v2/go/pkg/beam/testing/passert"
+)
+
+func init() {
+	beam.RegisterFunction(createTestRows)
+	beam.RegisterType(reflect.TypeOf((*TestRow)(nil)))
+	beam.RegisterType(reflect.TypeOf((*RandData)(nil)))
+}
+
+const (
+	// A text to shuffle to get random words.
+	text = "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Maecenas eget nulla nec velit hendrerit placerat. Donec eu odio ultricies, fermentum arcu at, mollis lectus. Vestibulum porttitor pharetra sem vitae feugiat. Mauris facilisis neque in mauris feugiat rhoncus. Donec eu ipsum at nibh lobortis euismod. Nam at hendrerit felis. Vivamus et orci ex. Nam dui nisl, rutrum ac pretium eget, vehicula in tortor. Class aptent taciti sociosqu ad litora torquent per conubia nostra, per inceptos himenaeos. Phasellus ante lorem, pharetra blandit dapibus et, tempus nec purus. Maecenas in posuere sem, vel pharetra nisl. Pellentesque habitant morbi tristique senectus et netus et malesuada fames ac turpis egestas. Donec nec facilisis ex. Praesent euismod commodo efficitur. Fusce in nisi nunc."
+	// Number of random elements to create for test. Must be less than number of words in text.
+	inputSize = 50
+)
+
+// TestRow is a sample row to write and read from that is expected to contain enough deterministic
+// and random data in different data types to provide a reasonable signal that reading and writing
+// works at a basic level.
+type TestRow struct {
+	Counter   int64    `beam:"counter"`   // A deterministic counter, increments for each row generated.
+	Rand_data RandData `beam:"rand_data"` // An inner struct containing randomized data.
+}
+
+func shuffleText() []string {
+	words := strings.Fields(text)
+	rand.Shuffle(len(words), func(i, j int) { words[i], words[j] = words[j], words[i] })
+	return words
+}
+
+// RandData is a struct of various types of random data.
+type RandData struct {
+	Flip bool   `beam:"flip"` // Flip is a bool with a random chance of either result (a coin flip).
+	Num  int64  `beam:"num"`  // Num is a random int64.
+	Word string `beam:"word"` // Word is a randomly selected word from a sample text.
+}
+
+// ddlSchema is a string for BigQuery data definition language that corresponds to TestRow.
+const ddlTestRowSchema = "counter INT64 NOT NULL, " +
+	"rand_data STRUCT<" +
+	"flip BOOL NOT NULL," +
+	"num INT64 NOT NULL," +
+	"word STRING NOT NULL" +
+	"> NOT NULL"
+
+// createTestRows creates a number of TestRows, populating the randomized data.
+func createTestRows(_ []byte, emit func(TestRow)) {
+	rand.Seed(time.Now().UnixNano())
+	words := shuffleText()
+	for i := 0; i < inputSize; i++ {
+		emit(TestRow{
+			Counter: int64(i),
+			Rand_data: RandData{
+				Flip: rand.Int63n(2) != 0,
+				Num:  rand.Int63(),
+				Word: words[i],
+			},
+		})
+	}
+}
+
+func WritePipeline(expansionAddr, table string, createFn interface{}) *beam.Pipeline {
+	p := beam.NewPipeline()
+	s := p.Root()
+
+	// Generate elements and write to table.
+	rows := beam.ParDo(s, createFn, beam.Impulse(s))
+	bigqueryio.Write(s, table, rows,
+		bigqueryio.CreateDisposition(bigqueryio.CreateNever),
+		bigqueryio.WriteExpansionAddr(expansionAddr))
+
+	return p
+}
+
+func ReadPipeline(expansionAddr, table string, createFn interface{}) *beam.Pipeline {
+	p := beam.NewPipeline()
+	s := p.Root()
+
+	// Read from table and compare to generated elements.
+	rows := beam.ParDo(s, createFn, beam.Impulse(s))
+	inType := reflect.TypeOf((*TestRow)(nil)).Elem()
+	readRows := bigqueryio.Read(s, inType,
+		bigqueryio.FromTable(table),

Review Comment:
   I did. As an update to what we talked about online, I'm not getting any permissions errors testing locally. (I am getting a different unrelated error, but it's because of a bug in how I wrote the test. Will commit once I fix that.)



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@beam.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org