You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@beam.apache.org by lo...@apache.org on 2022/07/03 23:51:42 UTC
[beam] branch master updated: Go SDK: Update memfs to parse the List() pattern as a glob, not a regexp (#21943)
This is an automated email from the ASF dual-hosted git repository.
lostluck pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git
The following commit(s) were added to refs/heads/master by this push:
new 385ee7f8dac Go SDK: Update memfs to parse the List() pattern as a glob, not a regexp (#21943)
385ee7f8dac is described below
commit 385ee7f8dacecc4f3117d81032b2b491ee4ef6ca
Author: Red Daly <re...@gmail.com>
AuthorDate: Sun Jul 3 16:51:35 2022 -0700
Go SDK: Update memfs to parse the List() pattern as a glob, not a regexp (#21943)
* Go SDK: Update memfs to parse the List() pattern as a glob, not a regexp.
This makes the memfs implementation consistent with the filesystem
implementation of List().
The docstring for filesystem.Interface.List does not specify how the pattern
should be interpretted. It says: "List expands a pattern to a list of
filenames." Perhaps that docstring should be updated to be more specific.
* Use "filepath" instead of "path" in memfs; add test cases.
Also, accept input without a "memfs://" prefix.
---
sdks/go/pkg/beam/io/filesystem/memfs/memory.go | 15 ++--
.../go/pkg/beam/io/filesystem/memfs/memory_test.go | 88 ++++++++++++++++++++--
2 files changed, 91 insertions(+), 12 deletions(-)
diff --git a/sdks/go/pkg/beam/io/filesystem/memfs/memory.go b/sdks/go/pkg/beam/io/filesystem/memfs/memory.go
index 951605a2401..782ae4c1694 100644
--- a/sdks/go/pkg/beam/io/filesystem/memfs/memory.go
+++ b/sdks/go/pkg/beam/io/filesystem/memfs/memory.go
@@ -19,10 +19,11 @@ package memfs
import (
"bytes"
"context"
+ "fmt"
"io"
"io/ioutil"
"os"
- "regexp"
+ "path/filepath"
"sort"
"strings"
"sync"
@@ -54,14 +55,16 @@ func (f *fs) List(_ context.Context, glob string) ([]string, error) {
f.mu.Lock()
defer f.mu.Unlock()
- pattern, err := regexp.Compile(glob)
- if err != nil {
- return nil, err
- }
+ // As with other functions, the memfs:// prefix is optional.
+ globNoScheme := strings.TrimPrefix(glob, "memfs://")
var ret []string
for k := range f.m {
- if pattern.MatchString(k) {
+ matched, err := filepath.Match(globNoScheme, strings.TrimPrefix(k, "memfs://"))
+ if err != nil {
+ return nil, fmt.Errorf("invalid glob pattern: %w", err)
+ }
+ if matched {
ret = append(ret, k)
}
}
diff --git a/sdks/go/pkg/beam/io/filesystem/memfs/memory_test.go b/sdks/go/pkg/beam/io/filesystem/memfs/memory_test.go
index b2b39b232e7..3944ba72456 100644
--- a/sdks/go/pkg/beam/io/filesystem/memfs/memory_test.go
+++ b/sdks/go/pkg/beam/io/filesystem/memfs/memory_test.go
@@ -18,6 +18,7 @@ package memfs
import (
"context"
"os"
+ "path/filepath"
"testing"
"github.com/apache/beam/sdks/v2/go/pkg/beam/io/filesystem"
@@ -105,7 +106,7 @@ func TestList(t *testing.T) {
t.Fatalf("Write(%q) error = %v", name, err)
}
}
- glob := "memfs://foo.*"
+ glob := "memfs://foo*"
got, err := fs.List(ctx, glob)
if err != nil {
t.Errorf("error List(%q) = %v", glob, err)
@@ -117,6 +118,81 @@ func TestList(t *testing.T) {
}
}
+func TestListTable(t *testing.T) {
+ ctx := context.Background()
+ for _, tt := range []struct {
+ name string
+ files []string
+ pattern string
+ want []string
+ wantErr bool
+ }{
+ {
+ name: "foo-star",
+ files: []string{"fizzbuzz", "foo", "foobar", "baz", "bazfoo"},
+ pattern: "memfs://foo*",
+ want: []string{"memfs://foo", "memfs://foobar"},
+ },
+ {
+ name: "foo-star-missing-memfs-prefix",
+ files: []string{"fizzbuzz", "foo", "foobar", "baz", "bazfoo"},
+ pattern: "foo*",
+ want: []string{"memfs://foo", "memfs://foobar"},
+ },
+ {
+ name: "bad-pattern",
+ files: []string{"fizzbuzz", "foo", "foobar", "baz", "bazfoo"},
+ pattern: "foo[",
+ wantErr: true, // invalid glob syntax
+ },
+ {
+ name: "foo",
+ files: []string{"fizzbuzz", "foo", "foobar", "baz", "bazfoo"},
+ pattern: "memfs://foo",
+ want: []string{"memfs://foo"},
+ },
+ {
+ name: "dirs",
+ files: []string{
+ "fizzbuzz",
+ filepath.Join("xyz", "12"),
+ filepath.Join("xyz", "1234"),
+ filepath.Join("xyz", "1235"),
+ "foobar",
+ "baz",
+ "bazfoo",
+ },
+ pattern: "memfs://xyz/123*",
+ want: []string{
+ "memfs://" + filepath.Join("xyz", "1234"),
+ "memfs://" + filepath.Join("xyz", "1235"),
+ },
+ },
+ } {
+ t.Run(tt.name, func(t *testing.T) {
+ fs := &fs{m: make(map[string][]byte)}
+
+ for _, name := range tt.files {
+ if err := filesystem.Write(ctx, fs, name, []byte("contents")); err != nil {
+ t.Fatalf("Write(%q) error = %v", name, err)
+ }
+ }
+ got, err := fs.List(ctx, tt.pattern)
+ if gotErr := err != nil; gotErr != tt.wantErr {
+ t.Errorf("List(%q) got error %v, wantErr = %v", tt.pattern, err, tt.wantErr)
+ }
+ if err != nil {
+ return
+ }
+
+ want := tt.want
+ if d := cmp.Diff(want, got); d != "" {
+ t.Errorf("List(%q) resulted in unexpected diff (-want, +got):\n %s", tt.pattern, d)
+ }
+ })
+ }
+}
+
func TestRemove(t *testing.T) {
ctx := context.Background()
fs := &fs{m: make(map[string][]byte)}
@@ -133,14 +209,14 @@ func TestRemove(t *testing.T) {
t.Errorf("error Remove(%q) = %v", toremove, err)
}
- got, err := fs.List(ctx, ".*")
+ got, err := fs.List(ctx, "memfs://*")
if err != nil {
- t.Errorf("error List(\".*\") = %v", err)
+ t.Errorf("error List(\"*\") = %v", err)
}
want := []string{"memfs://bazfoo", "memfs://fizzbuzz"}
if d := cmp.Diff(want, got); d != "" {
- t.Errorf("After Remove fs.List(\".*\") = %v, want %v", got, want)
+ t.Errorf("After Remove fs.List(\"*\") = %v, want %v", got, want)
}
}
@@ -156,7 +232,7 @@ func TestCopy(t *testing.T) {
if err := filesystem.Copy(ctx, fs, "memfs://fizzbuzz", "memfs://fizzbang"); err != nil {
t.Fatalf("Copy() error = %v", err)
}
- glob := "memfs://fizz.*"
+ glob := "memfs://fizz*"
got, err := fs.List(ctx, glob)
if err != nil {
t.Errorf("error List(%q) = %v", glob, err)
@@ -188,7 +264,7 @@ func TestRename(t *testing.T) {
if err := filesystem.Rename(ctx, fs, "memfs://fizzbuzz", "memfs://fizzbang"); err != nil {
t.Fatalf("Rename() error = %v", err)
}
- glob := "memfs://fizz.*"
+ glob := "memfs://fizz*"
got, err := fs.List(ctx, glob)
if err != nil {
t.Errorf("error List(%q) = %v", glob, err)