You are viewing a plain text version of this content. The canonical link for it is here.
Posted to github@beam.apache.org by GitBox <gi...@apache.org> on 2022/03/03 15:56:49 UTC

[GitHub] [beam] AydarZaynutdinov commented on a change in pull request #17000: [BEAM-13881] [Playground] Increase test coverage for the utils package

AydarZaynutdinov commented on a change in pull request #17000:
URL: https://github.com/apache/beam/pull/17000#discussion_r818792895



##########
File path: playground/backend/internal/utils/common_test.go
##########
@@ -15,7 +15,51 @@
 
 package utils
 
-import "testing"
+import (
+	"fmt"
+	"github.com/google/uuid"
+	"os"
+	"path/filepath"
+	"testing"
+)
+
+const (
+	sourceDir         = "sourceDir"
+	fileName          = "file.txt"
+	fileContent       = "content"
+	javaFileName      = "javaFileName.java"
+	pythonExampleName = "wordCount.py"
+	wordCountPython   = "import argparse\nimport logging\nimport re\n\nimport apache_beam as beam\nfrom apache_beam.io import ReadFromText\nfrom apache_beam.io import WriteToText\nfrom apache_beam.options.pipeline_options import PipelineOptions\nfrom apache_beam.options.pipeline_options import SetupOptions\n\n\nclass WordExtractingDoFn(beam.DoFn):\n  \"\"\"Parse each line of input text into words.\"\"\"\n  def process(self, element):\n    \"\"\"Returns an iterator over the words of this element.\n\n    The element is a line of text.  If the line is blank, note that, too.\n\n    Args:\n      element: the element being processed\n\n    Returns:\n      The processed element.\n    \"\"\"\n    return re.findall(r'[\\w\\']+', element, re.UNICODE)\n\n\ndef run(argv=None, save_main_session=True):\n  \"\"\"Main entry point; defines and runs the wordcount pipeline.\"\"\"\n  parser = argparse.ArgumentParser()\n  parser.add_argument(\n      '--input',\n      dest='input',\n      default='gs://data
 flow-samples/shakespeare/kinglear.txt',\n      help='Input file to process.')\n  parser.add_argument(\n      '--output',\n      dest='output',\n      required=True,\n      help='Output file to write results to.')\n  known_args, pipeline_args = parser.parse_known_args(argv)\n\n  # We use the save_main_session option because one or more DoFn's in this\n  # workflow rely on global context (e.g., a module imported at module level).\n  pipeline_options = PipelineOptions(pipeline_args)\n  pipeline_options.view_as(SetupOptions).save_main_session = save_main_session\n\n  # The pipeline will be run on exiting the with block.\n  with beam.Pipeline(options=pipeline_options) as p:\n\n    # Read the text file[pattern] into a PCollection.\n    lines = p | 'Read' >> ReadFromText(known_args.input)\n\n    counts = (\n        lines\n        | 'Split' >> (beam.ParDo(WordExtractingDoFn()).with_output_types(str))\n        | 'PairWithOne' >> beam.Map(lambda x: (x, 1))\n        | 'GroupAndSum' >> beam.Com
 binePerKey(sum))\n\n    # Format the counts into a PCollection of strings.\n    def format_result(word, count):\n      return '%s: %d' % (word, count)\n\n    output = counts | 'Format' >> beam.MapTuple(format_result)\n\n    # Write the output using a \"Write\" transform that has side effects.\n    # pylint: disable=expression-not-assigned\n    output | 'Write' >> WriteToText(known_args.output)\n\n\nif __name__ == '__main__':\n  logging.getLogger().setLevel(logging.INFO)\n  run()"
+	javaCode          = "package org.apache.beam.examples;\n\n// beam-playground:\n//   name: MinimalWordCount\n//   description: An example that counts words in Shakespeare's works.\n//   multifile: false\n//   default_example: true\n//   context_line: 71\n//   categories:\n//     - Combiners\n//     - Filtering\n//     - IO\n//     - Core Transforms\n//     - Quickstart\n\nimport java.util.Arrays;\nimport org.apache.beam.sdk.Pipeline;\nimport org.apache.beam.sdk.io.TextIO;\nimport org.apache.beam.sdk.options.PipelineOptions;\nimport org.apache.beam.sdk.options.PipelineOptionsFactory;\nimport org.apache.beam.sdk.transforms.Count;\nimport org.apache.beam.sdk.transforms.Filter;\nimport org.apache.beam.sdk.transforms.FlatMapElements;\nimport org.apache.beam.sdk.transforms.MapElements;\nimport org.apache.beam.sdk.values.KV;\nimport org.apache.beam.sdk.values.TypeDescriptors;\n\n/**\n * An example that counts words in Shakespeare.\n *\n * <p>This class, {@link MinimalWordCount}, is the fir
 st in a series of four successively more\n * detailed 'word count' examples. Here, for simplicity, we don't show any error-checking or\n * argument processing, and focus on construction of the pipeline, which chains together the\n * application of core transforms.\n *\n * <p>Next, see the {@link WordCount} pipeline, then the {@link DebuggingWordCount}, and finally the\n * {@link WindowedWordCount} pipeline, for more detailed examples that introduce additional\n * concepts.\n *\n * <p>Concepts:\n *\n * <pre>\n *   1. Reading data from text files\n *   2. Specifying 'inline' transforms\n *   3. Counting items in a PCollection\n *   4. Writing data to text files\n * </pre>\n *\n * <p>No arguments are required to run this pipeline. It will be executed with the DirectRunner. You\n * can see the results in the output files in your current working directory, with names like\n * \"wordcounts-00001-of-00005. When running on a distributed service, you would use an appropriate\n * file service
 .\n */\npublic class MinimalWordCount {\n\n  public static void main(String[] args) {\n\n    // Create a PipelineOptions object. This object lets us set various execution\n    // options for our pipeline, such as the runner you wish to use. This example\n    // will run with the DirectRunner by default, based on the class path configured\n    // in its dependencies.\n    PipelineOptions options = PipelineOptionsFactory.create();\n\n    // In order to run your pipeline, you need to make following runner specific changes:\n    //\n    // CHANGE 1/3: Select a Beam runner, such as BlockingDataflowRunner\n    // or FlinkRunner.\n    // CHANGE 2/3: Specify runner-required options.\n    // For BlockingDataflowRunner, set project and temp location as follows:\n    //   DataflowPipelineOptions dataflowOptions = options.as(DataflowPipelineOptions.class);\n    //   dataflowOptions.setRunner(BlockingDataflowRunner.class);\n    //   dataflowOptions.setProject(\"SET_YOUR_PROJECT_ID_HERE\");\n    
 //   dataflowOptions.setTempLocation(\"gs://SET_YOUR_BUCKET_NAME_HERE/AND_TEMP_DIRECTORY\");\n    // For FlinkRunner, set the runner as follows. See {@code FlinkPipelineOptions}\n    // for more details.\n    //   options.as(FlinkPipelineOptions.class)\n    //      .setRunner(FlinkRunner.class);\n\n    // Create the Pipeline object with the options we defined above\n    Pipeline p = Pipeline.create(options);\n\n    // Concept #1: Apply a root transform to the pipeline; in this case, TextIO.Read to read a set\n    // of input text files. TextIO.Read returns a PCollection where each element is one line from\n    // the input text (a set of Shakespeare's texts).\n\n    // This example reads from a public dataset containing the text of King Lear.\n    p.apply(TextIO.read().from(\"gs://apache-beam-samples/shakespeare/kinglear.txt\"))\n\n        // Concept #2: Apply a FlatMapElements transform the PCollection of text lines.\n        // This transform splits the lines in PCollection<String
 >, where each element is an\n        // individual word in Shakespeare's collected texts.\n        .apply(\n            FlatMapElements.into(TypeDescriptors.strings())\n                .via((String line) -> Arrays.asList(line.split(\"[^\\\\p{L}]+\"))))\n        // We use a Filter transform to avoid empty word\n        .apply(Filter.by((String word) -> !word.isEmpty()))\n        // Concept #3: Apply the Count transform to our PCollection of individual words. The Count\n        // transform returns a new PCollection of key/value pairs, where each key represents a\n        // unique word in the text. The associated value is the occurrence count for that word.\n        .apply(Count.perElement())\n        // Apply a MapElements transform that formats our PCollection of word counts into a\n        // printable string, suitable for writing to an output file.\n        .apply(\n            MapElements.into(TypeDescriptors.strings())\n                .via(\n                    (KV<String, Lon
 g> wordCount) ->\n                        wordCount.getKey() + \": \" + wordCount.getValue()))\n        // Concept #4: Apply a write transform, TextIO.Write, at the end of the pipeline.\n        // TextIO.Write writes the contents of a PCollection (in this case, our PCollection of\n        // formatted strings) to a series of text files.\n        //\n        // By default, it will write to a set of files with names like wordcounts-00001-of-00005\n        .apply(TextIO.write().to(\"wordcounts\"));\n\n    p.run().waitUntilFinish();\n  }\n}"
+	filePermission    = 0600
+)
+
+func TestMain(m *testing.M) {
+	err := setup()
+	if err != nil {
+		panic(fmt.Errorf("error during test setup: %s", err.Error()))
+	}
+	defer teardown()
+	m.Run()
+}
+
+func setup() error {
+	err := os.Mkdir(sourceDir, 0755)

Review comment:
       `0755` as a constant

##########
File path: playground/backend/internal/utils/preparares_utils_test.go
##########
@@ -50,3 +60,347 @@ func TestSpacesToEqualsOption(t *testing.T) {
 		})
 	}
 }
+
+func TestInitVars(t *testing.T) {
+	tests := []struct {
+		name  string
+		want  string
+		want1 string
+		want2 error
+		want3 bool
+		want4 PipelineDefinitionType

Review comment:
       maybe change this one with a slice?

##########
File path: playground/backend/internal/utils/preparares_utils_test.go
##########
@@ -50,3 +60,347 @@ func TestSpacesToEqualsOption(t *testing.T) {
 		})
 	}
 }
+
+func TestInitVars(t *testing.T) {
+	tests := []struct {
+		name  string
+		want  string
+		want1 string
+		want2 error
+		want3 bool
+		want4 PipelineDefinitionType
+	}{
+		{
+			name:  "Create empty variables",
+			want:  EmptyLine,
+			want1: EmptyLine,
+			want2: errors.New(EmptyLine),
+			want3: false,
+			want4: RegularDefinition,
+		},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			got, got1, got2, got3, got4 := InitVars()
+			if got != tt.want {
+				t.Errorf("InitVars() got = %v, want %v", got, tt.want)
+			}
+			if got1 != tt.want1 {
+				t.Errorf("InitVars() got1 = %v, want %v", got1, tt.want1)
+			}
+			if !reflect.DeepEqual(got2, tt.want2) {
+				t.Errorf("InitVars() got2 = %v, want %v", got2, tt.want2)
+			}
+			if got3 != tt.want3 {
+				t.Errorf("InitVars() got3 = %v, want %v", got3, tt.want3)
+			}
+			if got4 != tt.want4 {
+				t.Errorf("InitVars() got4 = %v, want %v", got4, tt.want4)
+			}
+		})
+	}
+}
+
+func TestAddGraphToEndOfFile(t *testing.T) {
+	txtFilePath := filepath.Join(sourceDir, fileName)
+	txtFile, err := os.OpenFile(txtFilePath, os.O_APPEND|os.O_WRONLY, os.ModeAppend)
+	if err != nil {
+		panic(err)
+	}
+	defer txtFile.Close()
+	incorrectFile, err := os.Open(txtFilePath)
+	if err != nil {
+		panic(err)
+	}
+	defer incorrectFile.Close()
+	type args struct {
+		spaces       string
+		err          error
+		tempFile     *os.File
+		pipelineName string
+	}
+	type fields struct {
+		fileContent string
+		filePath    string
+	}
+	tests := []struct {
+		name    string
+		args    args
+		fields  fields
+		wantErr bool
+	}{
+		{
+			name: "Add graph to the end of an existing file",
+			args: args{
+				spaces:       "",
+				err:          nil,
+				tempFile:     txtFile,
+				pipelineName: uuid.New().String(),
+			},
+			fields: fields{
+				fileContent: fileContent,
+				filePath:    txtFilePath,
+			},
+			wantErr: false,
+		},
+		{
+			name: "Error during write data to file",
+			args: args{
+				spaces:       "",
+				err:          nil,
+				tempFile:     incorrectFile,
+				pipelineName: uuid.New().String(),
+			},
+			fields: fields{
+				fileContent: fileContent,
+				filePath:    txtFilePath,
+			},
+			wantErr: true,
+		},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			AddGraphToEndOfFile(tt.args.spaces, tt.args.err, tt.args.tempFile, tt.args.pipelineName)
+			data, err := os.ReadFile(tt.fields.filePath)
+			if err != nil {
+				t.Errorf("AddGraphToEndOfFile() error during reading from a file = %v", err)
+			}
+			graphCode := fmt.Sprintf(pythonGraphCodePattern, tt.args.pipelineName, GraphFileName)
+			graphCodeWithIndentation := strings.ReplaceAll(graphCode, indentationReplacement, tt.args.spaces)
+			fileContentWithGraph := fileContent + "\n" + graphCodeWithIndentation
+			if (string(data) != fileContentWithGraph) != tt.wantErr {
+				t.Error("AddGraphToEndOfFile() wrong graph addition")
+			}
+		})
+	}
+}
+
+func TestGetPublicClassName(t *testing.T) {
+	javaPublicClassNamePattern := "public class (.*?) [{|implements(.*)]"
+	type args struct {
+		filePath string
+		pattern  string
+	}
+	tests := []struct {
+		name    string
+		args    args
+		want    string
+		wantErr bool
+	}{
+		{
+			name: "Get public class name from existing java file",
+			args: args{
+				filePath: filepath.Join(sourceDir, javaFileName),
+				pattern:  javaPublicClassNamePattern,
+			},
+			want:    "MinimalWordCount",
+			wantErr: false,
+		},
+		{
+			name: "Get public class name from non-existent file",
+			args: args{
+				filePath: filepath.Join(sourceDir, "file1.java"),
+				pattern:  javaPublicClassNamePattern,
+			},
+			want:    "",
+			wantErr: true,
+		},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			got, err := GetPublicClassName(tt.args.filePath, tt.args.pattern)
+			if (err != nil) != tt.wantErr {
+				t.Errorf("GetPublicClassName() error = %v, wantErr %v", err, tt.wantErr)
+				return
+			}
+			if got != tt.want {
+				t.Errorf("GetPublicClassName() got = %v, want %v", got, tt.want)
+			}
+		})
+	}
+}
+
+func TestAddNewLine(t *testing.T) {
+	javaFile, err := os.OpenFile(filepath.Join(sourceDir, javaFileName), os.O_APPEND|os.O_WRONLY, os.ModeAppend)
+	if err != nil {
+		panic(err)
+	}
+	defer javaFile.Close()
+	txtFile, err := os.Open(filepath.Join(sourceDir, fileName))
+	if err != nil {
+		panic(err)
+	}
+	defer txtFile.Close()
+	type args struct {
+		newLine bool
+		file    *os.File
+	}
+	tests := []struct {
+		name    string
+		args    args
+		wantErr bool
+	}{
+		{
+			name: "With newLine = false",
+			args: args{
+				newLine: false,
+				file:    nil,
+			},
+			wantErr: false,
+		},
+		{
+			name: "Add a new line to an existing javaFile",
+			args: args{
+				newLine: true,
+				file:    javaFile,
+			},
+			wantErr: false,
+		},
+		{
+			name: "Error during write data to file",
+			args: args{
+				newLine: true,
+				file:    txtFile,
+			},
+			wantErr: true,
+		},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			if err := AddNewLine(tt.args.newLine, tt.args.file); (err != nil) != tt.wantErr {
+				t.Errorf("AddNewLine() error = %v, wantErr %v", err, tt.wantErr)
+			}
+		})
+	}
+}
+
+func TestProcessLine(t *testing.T) {
+	pipelineName := uuid.New().String()
+	pythonExample, err := os.OpenFile(filepath.Join(sourceDir, pythonExampleName), os.O_RDWR, 0755)
+	if err != nil {
+		panic(err)
+	}
+	defer pythonExample.Close()
+	findPipelinePattern := `^(\s*)(.+) = beam.Pipeline`
+	findWithPipelinePattern := `(\s*)with.+Pipeline.+as (.+):`
+	emptyLine := EmptyLine
+
+	type args struct {
+		curLine      string
+		pipelineName *string
+		spaces       *string
+		regs         *[]*regexp.Regexp
+		tempFile     *os.File
+		err          error
+	}
+	tests := []struct {
+		name    string
+		args    args
+		want    bool
+		want1   PipelineDefinitionType

Review comment:
       try do not to use `want, want1` naming.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@beam.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org