You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@skywalking.apache.org by ke...@apache.org on 2022/06/10 13:57:10 UTC

[skywalking-eyes] 01/01: Add custom license urls for identification

This is an automated email from the ASF dual-hosted git repository.

kezhenxu94 pushed a commit to branch urls
in repository https://gitbox.apache.org/repos/asf/skywalking-eyes.git

commit 7bf440603ef8345148fcb6d4fdc53829334311bb
Author: kezhenxu94 <ke...@apache.org>
AuthorDate: Sun May 22 16:29:38 2022 +0800

    Add custom license urls for identification
---
 README.md                      |  2 +-
 assets/urls.yaml               | 40 +++++++++++++++++++++++++++
 pkg/deps/golang.go             | 17 +++++++-----
 pkg/deps/jar.go                | 58 +++++++++++++++++++++++++--------------
 pkg/deps/maven.go              | 62 ++++++++++++++++++++++++++----------------
 pkg/deps/npm.go                | 16 +++++++----
 pkg/deps/resolve.go            |  1 +
 pkg/license/identifier.go      | 26 +++++++++++++++++-
 pkg/license/identifier_test.go | 30 ++++++++++++++++++++
 9 files changed, 192 insertions(+), 60 deletions(-)

diff --git a/README.md b/README.md
index 49dd4cb..ef366de 100644
--- a/README.md
+++ b/README.md
@@ -788,7 +788,7 @@ dependency: # <15>
 15. The `dependency` section is configurations for resolving dependencies' licenses.
 16. The `files` are the files that declare the dependencies of a project, typically, `go.mod` in Go project, `pom.xml` in maven project, and `package.json` in NodeJS project. If it's a relative path, it's relative to the `.licenserc.yaml`.
 17. Declare the licenses which cannot be identified by this tool.
-18. The `name` of the dependency, The name is different for different projects, `PackagePath` in Go project, `GroupID:ArtifactID` in maven project, `PackageName` in NodeJS project.
+18. The `name` of the dependency, The name is different for different projects, `PackagePath` in Go project, `.jar` file in maven project, `PackageName` in NodeJS project.
 19. The `version` of the dependency, it's locked, preventing license changed between different versions.
 20. The [SPDX ID](https://spdx.org/licenses/) of the dependency license.
 21. The minimum percentage of the file that must contain license text for identifying a license, default is `75`.
diff --git a/assets/urls.yaml b/assets/urls.yaml
new file mode 100644
index 0000000..f5f34c9
--- /dev/null
+++ b/assets/urls.yaml
@@ -0,0 +1,40 @@
+Apache-2.0:
+  - apache.org/licenses/license-2.0
+  - apache.org/licenses/license-2.0.txt
+  - apache.org/license/license-2.0
+  - apache.org/license/license-2.0.txt
+  - www.apache.org/licenses/license-2.0.txt
+  - opensource.org/licenses/apache-2.0
+  - www.apache.org/license/LICENSE-2.0.txt
+
+MIT:
+  - bouncycastle.org/licence
+  - opensource.org/licenses/mit-license.php
+  - opensource.org/licenses/mit
+  - www.bouncycastle.org/licence.html
+  - www.opensource.org/licenses/mit-license.php
+
+LGPL-2.1:
+  - gnu.org/licenses/old-licenses/lgpl-2.1
+  - www.gnu.org/licenses/old-licenses/lgpl-2.1.html
+  - www.gnu.org/licenses/lgpl-2.1.html
+
+GPL-2.0-with-classpath-exception:
+  - gnu.org/software/classpath/license
+  - www.gnu.org/software/classpath/license.html
+
+EPL-1.0:
+  - eclipse.org/legal/epl-v10
+  - www.eclipse.org/legal/epl-v10.html
+
+BSD-2-Clause:
+  - opensource.org/licenses/BSD-2-Clause
+
+BSD-3-Clause:
+  - opensource.org/licenses/BSD-3-Clause
+
+CC0-1.0:
+  - creativecommons.org/publicdomain/zero/1.0/
+
+MPL-1.1:
+  - www.mozilla.org/MPL/MPL-1.1.html
diff --git a/pkg/deps/golang.go b/pkg/deps/golang.go
index 7483005..842be06 100644
--- a/pkg/deps/golang.go
+++ b/pkg/deps/golang.go
@@ -27,6 +27,7 @@ import (
 	"os/exec"
 	"path/filepath"
 	"regexp"
+	"strings"
 
 	"github.com/apache/skywalking-eyes/internal/logger"
 	"github.com/apache/skywalking-eyes/pkg/license"
@@ -86,13 +87,15 @@ func (resolver *GoModResolver) ResolvePackages(modules []*packages.Module, confi
 	for _, module := range modules {
 		func() {
 			for _, l := range config.Licenses {
-				if l.Name == module.Path && l.Version == module.Version {
-					report.Resolve(&Result{
-						Dependency:    module.Path,
-						LicenseSpdxID: l.License,
-						Version:       module.Version,
-					})
-					return
+				for _, version := range strings.Split(l.Version, ",") {
+					if l.Name == module.Path && version == module.Version {
+						report.Resolve(&Result{
+							Dependency:    module.Path,
+							LicenseSpdxID: l.License,
+							Version:       module.Version,
+						})
+						return
+					}
 				}
 			}
 			err := resolver.ResolvePackageLicense(config, module, report)
diff --git a/pkg/deps/jar.go b/pkg/deps/jar.go
index 3c56188..1e0c774 100644
--- a/pkg/deps/jar.go
+++ b/pkg/deps/jar.go
@@ -29,31 +29,39 @@ import (
 
 	"github.com/apache/skywalking-eyes/internal/logger"
 	"github.com/apache/skywalking-eyes/pkg/license"
+	"github.com/bmatcuk/doublestar/v2"
 )
 
 type JarResolver struct{}
 
-func (resolver *JarResolver) CanResolve(jarFile string) bool {
-	return filepath.Ext(jarFile) == ".jar"
+func (resolver *JarResolver) CanResolve(jarFiles string) bool {
+	return strings.HasSuffix(jarFiles, ".jar")
 }
 
-func (resolver *JarResolver) Resolve(jarFile string, config *ConfigDeps, report *Report) error {
-	state := NotFound
-	if err := resolver.ResolveJar(config, &state, jarFile, Unknown, report); err != nil {
-		dep := filepath.Base(jarFile)
-		logger.Log.Warnf("Failed to resolve the license of <%s>: %v\n", dep, state.String())
-		report.Skip(&Result{
-			Dependency:    dep,
-			LicenseSpdxID: Unknown,
-		})
+func (resolver *JarResolver) Resolve(jarFiles string, config *ConfigDeps, report *Report) error {
+	fs, err := doublestar.Glob(jarFiles)
+	if err != nil {
+		return err
+	}
+	for _, jarFile := range fs {
+		state := NotFound
+		if err := resolver.ResolveJar(config, &state, jarFile, Unknown, report); err != nil {
+			dep := filepath.Base(jarFile)
+			logger.Log.Warnf("Failed to resolve the license of <%s>: %v\n", dep, state.String())
+			report.Skip(&Result{
+				Dependency:    dep,
+				LicenseSpdxID: Unknown,
+			})
+		}
 	}
-
 	return nil
 }
 
 func (resolver *JarResolver) ResolveJar(config *ConfigDeps, state *State, jarFile, version string, report *Report) error {
 	dep := filepath.Base(jarFile)
 
+	logger.Log.Debugf("Resolving the license of <%s> from jar\n", dep)
+
 	compressedJar, err := zip.OpenReader(jarFile)
 	if err != nil {
 		return err
@@ -66,10 +74,7 @@ func (resolver *JarResolver) ResolveJar(config *ConfigDeps, state *State, jarFil
 	for _, compressedFile := range compressedJar.File {
 		archiveFile := compressedFile.Name
 		switch {
-		case reHaveManifestFile.MatchString(archiveFile):
-			manifestFile = compressedFile
-
-		case possibleLicenseFileName.MatchString(archiveFile):
+		case reMaybeLicense.MatchString(archiveFile):
 			*state |= FoundLicenseInJarLicenseFile
 			buf, err := resolver.ReadFileFromZip(compressedFile)
 			if err != nil {
@@ -77,6 +82,8 @@ func (resolver *JarResolver) ResolveJar(config *ConfigDeps, state *State, jarFil
 			}
 
 			return resolver.IdentifyLicense(config, jarFile, dep, buf.String(), version, report)
+		case reHaveManifestFile.MatchString(archiveFile):
+			manifestFile = compressedFile
 		}
 	}
 
@@ -90,11 +97,15 @@ func (resolver *JarResolver) ResolveJar(config *ConfigDeps, state *State, jarFil
 
 		r := reSearchLicenseInManifestFile.FindStringSubmatch(content)
 		if len(r) != 0 {
+			lcs := strings.TrimSpace(r[1])
+			if l, err := license.Identify(lcs, config.Threshold); err == nil {
+				lcs = l
+			}
 			report.Resolve(&Result{
 				Dependency:      dep,
 				LicenseFilePath: jarFile,
 				LicenseContent:  strings.TrimSpace(r[1]),
-				LicenseSpdxID:   strings.TrimSpace(r[1]),
+				LicenseSpdxID:   lcs,
 				Version:         version,
 			})
 			return nil
@@ -123,16 +134,21 @@ func (resolver *JarResolver) ReadFileFromZip(archiveFile *zip.File) (*bytes.Buff
 }
 
 func (resolver *JarResolver) IdentifyLicense(config *ConfigDeps, path, dep, content, version string, report *Report) error {
-	identifier, err := license.Identify(content, config.Threshold)
-	if err != nil {
-		return err
+	contents := strings.Split(content, "[, \\s]+")
+	identifiers := make([]string, 0, len(contents))
+	for _, c := range contents {
+		identifier, err := license.Identify(c, config.Threshold)
+		if err != nil {
+			return err
+		}
+		identifiers = append(identifiers, identifier)
 	}
 
 	report.Resolve(&Result{
 		Dependency:      dep,
 		LicenseFilePath: path,
 		LicenseContent:  content,
-		LicenseSpdxID:   identifier,
+		LicenseSpdxID:   strings.Join(identifiers, " and "),
 		Version:         version,
 	})
 	return nil
diff --git a/pkg/deps/maven.go b/pkg/deps/maven.go
index 23c6324..abd869f 100644
--- a/pkg/deps/maven.go
+++ b/pkg/deps/maven.go
@@ -32,6 +32,7 @@ import (
 	"golang.org/x/net/html/charset"
 
 	"github.com/apache/skywalking-eyes/internal/logger"
+	"github.com/apache/skywalking-eyes/pkg/license"
 )
 
 type MavenPomResolver struct {
@@ -131,7 +132,7 @@ func (resolver *MavenPomResolver) LoadDependencies() ([]*Dependency, error) {
 	cmd.Stdout = bufio.NewWriter(buf)
 	cmd.Stderr = os.Stderr
 
-	logger.Log.Debugf("Run command: 「%v」, please wait", cmd.String())
+	logger.Log.Debugf("Running command: [%v], please wait", cmd.String())
 	err := cmd.Run()
 	if err != nil {
 		return nil, err
@@ -146,13 +147,15 @@ func (resolver *MavenPomResolver) ResolveDependencies(deps []*Dependency, config
 	for _, dep := range deps {
 		func() {
 			for _, l := range config.Licenses {
-				if l.Name == fmt.Sprintf("%s:%s", dep.GroupID, dep.ArtifactID) && l.Version == dep.Version {
-					report.Resolve(&Result{
-						Dependency:    dep.Jar(),
-						LicenseSpdxID: l.License,
-						Version:       dep.Version,
-					})
-					return
+				for _, version := range strings.Split(l.Version, ",") {
+					if l.Name == fmt.Sprintf("%s:%s", strings.Join(dep.GroupID, "."), dep.ArtifactID) && version == dep.Version {
+						report.Resolve(&Result{
+							Dependency:    dep.Jar(),
+							LicenseSpdxID: l.License,
+							Version:       dep.Version,
+						})
+						return
+					}
 				}
 			}
 			state := NotFound
@@ -187,12 +190,14 @@ func (resolver *MavenPomResolver) ResolveLicenseFromPom(config *ConfigDeps, stat
 	pom, err := resolver.ReadLicensesFromPom(pomFile)
 	if err != nil {
 		return err
-	} else if pom != nil && len(pom.Licenses) != 0 {
+	}
+
+	if pom != nil && len(pom.Licenses) != 0 {
 		report.Resolve(&Result{
 			Dependency:      dep.Jar(),
 			LicenseFilePath: pomFile,
 			LicenseContent:  pom.Raw(),
-			LicenseSpdxID:   pom.AllLicenses(),
+			LicenseSpdxID:   pom.AllLicenses(config),
 			Version:         dep.Version,
 		})
 
@@ -266,7 +271,7 @@ loop:
 }
 
 var (
-	reMaybeLicense                = regexp.MustCompile(`(?i)licen[sc]e|copyright|copying`)
+	reMaybeLicense                = regexp.MustCompile(`(?i)licen[sc]e|copyright|copying$`)
 	reHaveManifestFile            = regexp.MustCompile(`(?i)^(\S*/)?manifest\.MF$`)
 	reSearchLicenseInManifestFile = regexp.MustCompile(`(?im)^.*?licen[cs]e.*?(http.+)`)
 )
@@ -310,18 +315,18 @@ func LoadDependenciesTree(data []byte) []*Dependency {
 	stack := []Elem{}
 	unique := make(map[string]struct{})
 
-	reFind := regexp.MustCompile(`(?im)^.*? ([| ]*)(\+-|\\-) (\b.+):(\b.+):(\b.+):(\b.+):(\b.+)$`)
+	reFind := regexp.MustCompile(`(?im)^.*? ([| ]*)(\+-|\\-) (?P<gid>\b.+?):(?P<aid>\b.+?):(?P<packaging>\b.+)(:\b.+)?:(?P<version>\b.+):(?P<scope>\b.+?)$`)
 	rawDeps := reFind.FindAllSubmatch(data, -1)
 
 	deps := make([]*Dependency, 0, len(rawDeps))
 	for _, rawDep := range rawDeps {
-		gid := strings.Split(string(rawDep[3]), ".")
+		gid := strings.Split(string(rawDep[reFind.SubexpIndex("gid")]), ".")
 		dep := &Dependency{
 			GroupID:    gid,
-			ArtifactID: string(rawDep[4]),
-			Packaging:  string(rawDep[5]),
-			Version:    string(rawDep[6]),
-			Scope:      string(rawDep[7]),
+			ArtifactID: string(rawDep[reFind.SubexpIndex("aid")]),
+			Packaging:  string(rawDep[reFind.SubexpIndex("packaging")]),
+			Version:    string(rawDep[reFind.SubexpIndex("version")]),
+			Scope:      string(rawDep[reFind.SubexpIndex("scope")]),
 		}
 
 		if _, have := unique[dep.Path()]; have {
@@ -391,7 +396,7 @@ func (s *State) String() string {
 		m = append(m, "failed to resolve license content from manifest file found in jar")
 	}
 
-	return strings.Join(m, "|")
+	return strings.Join(m, " | ")
 }
 
 type Dependency struct {
@@ -457,12 +462,12 @@ type PomFile struct {
 }
 
 // AllLicenses return all licenses found in pom.xml file
-func (pom *PomFile) AllLicenses() string {
+func (pom *PomFile) AllLicenses(config *ConfigDeps) string {
 	licenses := []string{}
 	for _, l := range pom.Licenses {
-		licenses = append(licenses, l.Item())
+		licenses = append(licenses, l.Item(config))
 	}
-	return strings.Join(licenses, ", ")
+	return strings.Join(licenses, " and ")
 }
 
 // Raw return raw data
@@ -481,14 +486,23 @@ type XMLLicense struct {
 	Comments     string `xml:"comments,omitempty"`
 }
 
-func (l *XMLLicense) Item() string {
-	return GetLicenseFromURL(l.URL)
+func (l *XMLLicense) Item(config *ConfigDeps) string {
+	if l.URL != "" {
+		return GetLicenseFromURL(l.URL, config)
+	}
+	if l.Name != "" {
+		return l.Name
+	}
+	return l.URL
 }
 
 func (l *XMLLicense) Raw() string {
 	return fmt.Sprintf(`License: {Name: %s, URL: %s, Distribution: %s, Comments: %s, }`, l.Name, l.URL, l.Distribution, l.Comments)
 }
 
-func GetLicenseFromURL(url string) string {
+func GetLicenseFromURL(url string, config *ConfigDeps) string {
+	if l, err := license.Identify(url, config.Threshold); err == nil {
+		return l
+	}
 	return url
 }
diff --git a/pkg/deps/npm.go b/pkg/deps/npm.go
index 3fc1a0c..4cdb28f 100644
--- a/pkg/deps/npm.go
+++ b/pkg/deps/npm.go
@@ -212,9 +212,11 @@ func (resolver *NpmResolver) ResolvePkgFile(result *Result, pkgPath string, lice
 
 	result.Version = packageInfo.Version
 	for _, l := range licenses {
-		if l.Name == packageInfo.Name && l.Version == packageInfo.Version {
-			result.LicenseSpdxID = l.License
-			return nil
+		for _, version := range strings.Split(l.Version, ",") {
+			if l.Name == packageInfo.Name && version == packageInfo.Version {
+				result.LicenseSpdxID = l.License
+				return nil
+			}
 		}
 	}
 
@@ -286,9 +288,11 @@ func (resolver *NpmResolver) ResolveLcsFile(result *Result, pkgPath string, conf
 			return nil
 		}
 		for _, l := range config.Licenses {
-			if l.Name == info.Name() && l.Version == result.Version {
-				result.LicenseSpdxID = l.License
-				return nil
+			for _, version := range strings.Split(l.Version, ",") {
+				if l.Name == info.Name() && version == result.Version {
+					result.LicenseSpdxID = l.License
+					return nil
+				}
 			}
 		}
 		identifier, err := license.Identify(string(content), config.Threshold)
diff --git a/pkg/deps/resolve.go b/pkg/deps/resolve.go
index b48eb90..8769d79 100644
--- a/pkg/deps/resolve.go
+++ b/pkg/deps/resolve.go
@@ -30,6 +30,7 @@ var Resolvers = []Resolver{
 	new(GoModResolver),
 	new(NpmResolver),
 	new(MavenPomResolver),
+	new(JarResolver),
 }
 
 func Resolve(config *ConfigDeps, report *Report) error {
diff --git a/pkg/license/identifier.go b/pkg/license/identifier.go
index 5c42cf3..d12376b 100644
--- a/pkg/license/identifier.go
+++ b/pkg/license/identifier.go
@@ -24,6 +24,7 @@ import (
 	"sync"
 
 	"github.com/google/licensecheck"
+	"gopkg.in/yaml.v3"
 
 	"github.com/apache/skywalking-eyes/assets"
 	"github.com/apache/skywalking-eyes/internal/logger"
@@ -40,8 +41,24 @@ var (
 // It will be initialized once.
 func scanner() *licensecheck.Scanner {
 	scannerOnce.Do(func() {
+		licenses := licensecheck.BuiltinLicenses()
+		if bs, err := assets.Asset("urls.yaml"); err == nil {
+			licenseURLs := make(map[string][]string)
+			if err := yaml.Unmarshal(bs, &licenseURLs); err == nil {
+				logger.Log.Debug("license URLs:", licenseURLs)
+				for id, urls := range licenseURLs {
+					for _, url := range urls {
+						licenses = append(licenses, licensecheck.License{
+							ID:   id,
+							URL:  strings.ToLower(url),
+							Type: licensecheck.Unknown,
+						})
+					}
+				}
+			}
+		}
 		var err error
-		_scanner, err = licensecheck.NewScanner(licensecheck.BuiltinLicenses())
+		_scanner, err = licensecheck.NewScanner(licenses)
 		if err != nil {
 			logger.Log.Fatalf("licensecheck.NewScanner: %v", err)
 		}
@@ -57,10 +74,17 @@ func Identify(content string, threshold int) (string, error) {
 		return "", fmt.Errorf("cannot identify the license, coverage: %.1f%%", coverage.Percent)
 	}
 
+	seen := make(map[string]bool)
+
 	var sb strings.Builder
 	sb.WriteString(coverage.Match[0].ID)
+	seen[coverage.Match[0].ID] = true
 
 	for i := 1; i < len(coverage.Match); i++ {
+		if seen[coverage.Match[i].ID] {
+			continue
+		}
+		seen[coverage.Match[i].ID] = true
 		sb.WriteString(" and ")
 		sb.WriteString(coverage.Match[i].ID)
 	}
diff --git a/pkg/license/identifier_test.go b/pkg/license/identifier_test.go
index ae5b666..7ac706f 100644
--- a/pkg/license/identifier_test.go
+++ b/pkg/license/identifier_test.go
@@ -323,6 +323,36 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 `,
 			want: "BSD-3-Clause",
 		},
+		{
+			name:    "URL http://www.apache.org/licenses/LICENSE-2.0",
+			content: "http://www.apache.org/licenses/LICENSE-2.0",
+			want:    "Apache-2.0",
+		},
+		{
+			name:    "URL https://opensource.org/licenses/Apache-2.0",
+			content: "https://opensource.org/licenses/Apache-2.0",
+			want:    "Apache-2.0",
+		},
+		{
+			name:    "URL http://www.apache.org/licenses/LICENSE-2.0.txt",
+			content: "http://www.apache.org/licenses/LICENSE-2.0.txt",
+			want:    "Apache-2.0",
+		},
+		{
+			name:    "URL https://www.bouncycastle.org/licence.html",
+			content: "https://www.bouncycastle.org/licence.html",
+			want:    "MIT",
+		},
+		{
+			name:    "URL https://www.bouncycastle.org/licence.html",
+			content: "http://www.gnu.org/software/classpath/license.html, http://opensource.org/licenses/MIT",
+			want:    "GPL-2.0-with-classpath-exception and MIT",
+		},
+		{
+			name:    "URL",
+			content: "http://www.mozilla.org/MPL/MPL-1.1.html, http://www.gnu.org/licenses/lgpl-2.1.html",
+			want:    "MPL-1.1 and LGPL-2.1",
+		},
 	}
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {