You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@beam.apache.org by lo...@apache.org on 2022/07/03 23:51:42 UTC

[beam] branch master updated: Go SDK: Update memfs to parse the List() pattern as a glob, not a regexp (#21943)

This is an automated email from the ASF dual-hosted git repository.

lostluck pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git


The following commit(s) were added to refs/heads/master by this push:
     new 385ee7f8dac Go SDK: Update memfs to parse the List() pattern as a glob, not a regexp (#21943)
385ee7f8dac is described below

commit 385ee7f8dacecc4f3117d81032b2b491ee4ef6ca
Author: Red Daly <re...@gmail.com>
AuthorDate: Sun Jul 3 16:51:35 2022 -0700

    Go SDK: Update memfs to parse the List() pattern as a glob, not a regexp (#21943)
    
    * Go SDK: Update memfs to parse the List() pattern as a glob, not a regexp.
    
    This makes the memfs implementation consistent with the filesystem
    implementation of List().
    
    The docstring for filesystem.Interface.List does not specify how the pattern
    should be interpretted. It says: "List expands a pattern to a list of
    filenames." Perhaps that docstring should be updated to be more specific.
    
    * Use "filepath" instead of "path" in memfs; add test cases.
    
    Also, accept input without a "memfs://" prefix.
---
 sdks/go/pkg/beam/io/filesystem/memfs/memory.go     | 15 ++--
 .../go/pkg/beam/io/filesystem/memfs/memory_test.go | 88 ++++++++++++++++++++--
 2 files changed, 91 insertions(+), 12 deletions(-)

diff --git a/sdks/go/pkg/beam/io/filesystem/memfs/memory.go b/sdks/go/pkg/beam/io/filesystem/memfs/memory.go
index 951605a2401..782ae4c1694 100644
--- a/sdks/go/pkg/beam/io/filesystem/memfs/memory.go
+++ b/sdks/go/pkg/beam/io/filesystem/memfs/memory.go
@@ -19,10 +19,11 @@ package memfs
 import (
 	"bytes"
 	"context"
+	"fmt"
 	"io"
 	"io/ioutil"
 	"os"
-	"regexp"
+	"path/filepath"
 	"sort"
 	"strings"
 	"sync"
@@ -54,14 +55,16 @@ func (f *fs) List(_ context.Context, glob string) ([]string, error) {
 	f.mu.Lock()
 	defer f.mu.Unlock()
 
-	pattern, err := regexp.Compile(glob)
-	if err != nil {
-		return nil, err
-	}
+	// As with other functions, the memfs:// prefix is optional.
+	globNoScheme := strings.TrimPrefix(glob, "memfs://")
 
 	var ret []string
 	for k := range f.m {
-		if pattern.MatchString(k) {
+		matched, err := filepath.Match(globNoScheme, strings.TrimPrefix(k, "memfs://"))
+		if err != nil {
+			return nil, fmt.Errorf("invalid glob pattern: %w", err)
+		}
+		if matched {
 			ret = append(ret, k)
 		}
 	}
diff --git a/sdks/go/pkg/beam/io/filesystem/memfs/memory_test.go b/sdks/go/pkg/beam/io/filesystem/memfs/memory_test.go
index b2b39b232e7..3944ba72456 100644
--- a/sdks/go/pkg/beam/io/filesystem/memfs/memory_test.go
+++ b/sdks/go/pkg/beam/io/filesystem/memfs/memory_test.go
@@ -18,6 +18,7 @@ package memfs
 import (
 	"context"
 	"os"
+	"path/filepath"
 	"testing"
 
 	"github.com/apache/beam/sdks/v2/go/pkg/beam/io/filesystem"
@@ -105,7 +106,7 @@ func TestList(t *testing.T) {
 			t.Fatalf("Write(%q) error = %v", name, err)
 		}
 	}
-	glob := "memfs://foo.*"
+	glob := "memfs://foo*"
 	got, err := fs.List(ctx, glob)
 	if err != nil {
 		t.Errorf("error List(%q) = %v", glob, err)
@@ -117,6 +118,81 @@ func TestList(t *testing.T) {
 	}
 }
 
+func TestListTable(t *testing.T) {
+	ctx := context.Background()
+	for _, tt := range []struct {
+		name    string
+		files   []string
+		pattern string
+		want    []string
+		wantErr bool
+	}{
+		{
+			name:    "foo-star",
+			files:   []string{"fizzbuzz", "foo", "foobar", "baz", "bazfoo"},
+			pattern: "memfs://foo*",
+			want:    []string{"memfs://foo", "memfs://foobar"},
+		},
+		{
+			name:    "foo-star-missing-memfs-prefix",
+			files:   []string{"fizzbuzz", "foo", "foobar", "baz", "bazfoo"},
+			pattern: "foo*",
+			want:    []string{"memfs://foo", "memfs://foobar"},
+		},
+		{
+			name:    "bad-pattern",
+			files:   []string{"fizzbuzz", "foo", "foobar", "baz", "bazfoo"},
+			pattern: "foo[",
+			wantErr: true, // invalid glob syntax
+		},
+		{
+			name:    "foo",
+			files:   []string{"fizzbuzz", "foo", "foobar", "baz", "bazfoo"},
+			pattern: "memfs://foo",
+			want:    []string{"memfs://foo"},
+		},
+		{
+			name: "dirs",
+			files: []string{
+				"fizzbuzz",
+				filepath.Join("xyz", "12"),
+				filepath.Join("xyz", "1234"),
+				filepath.Join("xyz", "1235"),
+				"foobar",
+				"baz",
+				"bazfoo",
+			},
+			pattern: "memfs://xyz/123*",
+			want: []string{
+				"memfs://" + filepath.Join("xyz", "1234"),
+				"memfs://" + filepath.Join("xyz", "1235"),
+			},
+		},
+	} {
+		t.Run(tt.name, func(t *testing.T) {
+			fs := &fs{m: make(map[string][]byte)}
+
+			for _, name := range tt.files {
+				if err := filesystem.Write(ctx, fs, name, []byte("contents")); err != nil {
+					t.Fatalf("Write(%q) error = %v", name, err)
+				}
+			}
+			got, err := fs.List(ctx, tt.pattern)
+			if gotErr := err != nil; gotErr != tt.wantErr {
+				t.Errorf("List(%q) got error %v, wantErr = %v", tt.pattern, err, tt.wantErr)
+			}
+			if err != nil {
+				return
+			}
+
+			want := tt.want
+			if d := cmp.Diff(want, got); d != "" {
+				t.Errorf("List(%q) resulted in unexpected diff (-want, +got):\n  %s", tt.pattern, d)
+			}
+		})
+	}
+}
+
 func TestRemove(t *testing.T) {
 	ctx := context.Background()
 	fs := &fs{m: make(map[string][]byte)}
@@ -133,14 +209,14 @@ func TestRemove(t *testing.T) {
 		t.Errorf("error Remove(%q) = %v", toremove, err)
 	}
 
-	got, err := fs.List(ctx, ".*")
+	got, err := fs.List(ctx, "memfs://*")
 	if err != nil {
-		t.Errorf("error List(\".*\") = %v", err)
+		t.Errorf("error List(\"*\") = %v", err)
 	}
 
 	want := []string{"memfs://bazfoo", "memfs://fizzbuzz"}
 	if d := cmp.Diff(want, got); d != "" {
-		t.Errorf("After Remove fs.List(\".*\") = %v, want %v", got, want)
+		t.Errorf("After Remove fs.List(\"*\") = %v, want %v", got, want)
 	}
 }
 
@@ -156,7 +232,7 @@ func TestCopy(t *testing.T) {
 	if err := filesystem.Copy(ctx, fs, "memfs://fizzbuzz", "memfs://fizzbang"); err != nil {
 		t.Fatalf("Copy() error = %v", err)
 	}
-	glob := "memfs://fizz.*"
+	glob := "memfs://fizz*"
 	got, err := fs.List(ctx, glob)
 	if err != nil {
 		t.Errorf("error List(%q) = %v", glob, err)
@@ -188,7 +264,7 @@ func TestRename(t *testing.T) {
 	if err := filesystem.Rename(ctx, fs, "memfs://fizzbuzz", "memfs://fizzbang"); err != nil {
 		t.Fatalf("Rename() error = %v", err)
 	}
-	glob := "memfs://fizz.*"
+	glob := "memfs://fizz*"
 	got, err := fs.List(ctx, glob)
 	if err != nil {
 		t.Errorf("error List(%q) = %v", glob, err)