You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@skywalking.apache.org by ke...@apache.org on 2020/12/23 05:08:58 UTC

[skywalking-eyes] branch refactor created (now e194b68)

This is an automated email from the ASF dual-hosted git repository.

kezhenxu94 pushed a change to branch refactor
in repository https://gitbox.apache.org/repos/asf/skywalking-eyes.git.


      at e194b68  Refactor the normalizers to increase accuracy and minimise the use of pattern

This branch includes the following new commits:

     new e194b68  Refactor the normalizers to increase accuracy and minimise the use of pattern

The 1 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.



[skywalking-eyes] 01/01: Refactor the normalizers to increase accuracy and minimise the use of pattern

Posted by ke...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

kezhenxu94 pushed a commit to branch refactor
in repository https://gitbox.apache.org/repos/asf/skywalking-eyes.git

commit e194b68d89bea1b7722833e270ad90e35980cd0c
Author: kezhenxu94 <ke...@apache.org>
AuthorDate: Wed Dec 23 13:08:41 2020 +0800

    Refactor the normalizers to increase accuracy and minimise the use of pattern
---
 .golangci.yml                          |  37 ++++-
 .licenserc.yaml                        |   4 +-
 license-eye/commands/header/check.go   |   3 +-
 license-eye/commands/header/fix.go     |   3 +-
 license-eye/internal/logger/log.go     |   2 +-
 license-eye/pkg/header/check.go        |  40 ++---
 license-eye/pkg/header/check_test.go   |   9 +-
 license-eye/pkg/header/config.go       |  50 ++-----
 license-eye/pkg/header/fix.go          |   7 +-
 license-eye/pkg/license/norm.go        | 171 ++++++++++++++++++++++
 license-eye/pkg/license/norm_test.go   | 260 +++++++++++++++++++++++++++++++++
 license-eye/pkg/{header => }/result.go |   2 +-
 12 files changed, 506 insertions(+), 82 deletions(-)

diff --git a/.golangci.yml b/.golangci.yml
index f4a7795..f4a6fee 100644
--- a/.golangci.yml
+++ b/.golangci.yml
@@ -16,6 +16,9 @@
 # under the License.
 # 
 
+run:
+  tests: false
+
 linters-settings:
   govet:
     check-shadowing: true
@@ -37,6 +40,38 @@ linters-settings:
       fmt: "logging is allowed only by logutils.Log"
   misspell:
     locale: US
+    ignore-words:
+      - analogue
+      - analyse
+      - artefact
+      - authorised
+      - calibre
+      - cancelled
+      - catalogue
+      - categorise
+      - centre
+      - emphasised
+      - favour
+      - favourite
+      - fulfil
+      - fulfilment
+      - initialise
+      - labelling
+      - labour
+      - licence
+      - maximise
+      - modelled
+      - modelling
+      - offence
+      - optimise
+      - organisation
+      - organise
+      - practise
+      - programme
+      - realise
+      - recognise
+      - signalling
+      - utilisation
   lll:
     line-length: 150
   goimports:
@@ -51,7 +86,7 @@ linters-settings:
     disabled-checks:
       - ifElseChain
   funlen:
-    lines: 150
+    lines: 100
     statements: 50
   whitespace:
     multi-if: false
diff --git a/.licenserc.yaml b/.licenserc.yaml
index e4494c7..819f2b1 100644
--- a/.licenserc.yaml
+++ b/.licenserc.yaml
@@ -38,10 +38,10 @@ header: # `header` section is configurations for source codes license header.
   # `pattern` is optional regexp if all the file headers are the same as `license` (linebreaks doesn't matter);
   # In the `pattern`, all punctuations should be removed unless they are part of the regex;
   pattern: |
-    Licensed to( the)? Apache Software Foundation \(ASF\) under one or more contributor
+    Licensed to the Apache Software Foundation under one or more contributor
     license agreements. See the NOTICE file distributed with
     this work for additional information regarding copyright
-    ownership. (Apache Software Foundation \(ASF\)|The ASF) licenses this file to you under
+    ownership. The Apache Software Foundation licenses this file to you under
     the Apache License, Version 2.0 \(the "License"\); you may
     not use this file except in compliance with the License.
     You may obtain a copy of the License at
diff --git a/license-eye/commands/header/check.go b/license-eye/commands/header/check.go
index c349ace..0578286 100644
--- a/license-eye/commands/header/check.go
+++ b/license-eye/commands/header/check.go
@@ -19,6 +19,7 @@ package header
 
 import (
 	"github.com/apache/skywalking-eyes/license-eye/internal/logger"
+	"github.com/apache/skywalking-eyes/license-eye/pkg"
 	"github.com/apache/skywalking-eyes/license-eye/pkg/config"
 	"github.com/apache/skywalking-eyes/license-eye/pkg/header"
 
@@ -31,7 +32,7 @@ var CheckCommand = &cobra.Command{
 	Long:    "check command walks the specified paths recursively and checks if the specified files have the license header in the config file.",
 	RunE: func(cmd *cobra.Command, args []string) error {
 		var config config.Config
-		var result header.Result
+		var result pkg.Result
 
 		if err := config.Parse(cfgFile); err != nil {
 			return err
diff --git a/license-eye/commands/header/fix.go b/license-eye/commands/header/fix.go
index b65069f..4fbba48 100644
--- a/license-eye/commands/header/fix.go
+++ b/license-eye/commands/header/fix.go
@@ -23,6 +23,7 @@ import (
 	"strings"
 
 	"github.com/apache/skywalking-eyes/license-eye/internal/logger"
+	"github.com/apache/skywalking-eyes/license-eye/pkg"
 	"github.com/apache/skywalking-eyes/license-eye/pkg/config"
 	"github.com/apache/skywalking-eyes/license-eye/pkg/header"
 	"github.com/spf13/cobra"
@@ -34,7 +35,7 @@ var FixCommand = &cobra.Command{
 	Long:    "fix command walks the specified paths recursively and fix the license header if the specified files don't have the license header.",
 	RunE: func(cmd *cobra.Command, args []string) error {
 		var config config.Config
-		var result header.Result
+		var result pkg.Result
 
 		if err := config.Parse(cfgFile); err != nil {
 			return err
diff --git a/license-eye/internal/logger/log.go b/license-eye/internal/logger/log.go
index 1c641e8..7fc102e 100644
--- a/license-eye/internal/logger/log.go
+++ b/license-eye/internal/logger/log.go
@@ -29,7 +29,7 @@ func init() {
 	if Log == nil {
 		Log = logrus.New()
 	}
-	Log.Level = logrus.InfoLevel
+	Log.Level = logrus.DebugLevel
 	Log.SetOutput(os.Stdout)
 	Log.SetFormatter(&logrus.TextFormatter{
 		DisableTimestamp:       true,
diff --git a/license-eye/pkg/header/check.go b/license-eye/pkg/header/check.go
index 8053cd3..c32279b 100644
--- a/license-eye/pkg/header/check.go
+++ b/license-eye/pkg/header/check.go
@@ -18,7 +18,6 @@
 package header
 
 import (
-	"bufio"
 	"io/ioutil"
 	"net/http"
 	"os"
@@ -27,20 +26,20 @@ import (
 	"strings"
 
 	"github.com/apache/skywalking-eyes/license-eye/internal/logger"
+	"github.com/apache/skywalking-eyes/license-eye/pkg"
+	lcs "github.com/apache/skywalking-eyes/license-eye/pkg/license"
 
 	"github.com/bmatcuk/doublestar/v2"
 )
 
-// TODO: also trim stop words
 var (
 	// LicenseLocationThreshold specifies the index threshold where the license header can be located,
 	// after all, a "header" cannot be TOO far from the file start.
 	LicenseLocationThreshold = 80
-	Punctuations             = regexp.MustCompile("[\\[\\]/*:;\\s#\\-!~'\"(){}?]+")
 )
 
 // Check checks the license headers of the specified paths/globs.
-func Check(config *ConfigHeader, result *Result) error {
+func Check(config *ConfigHeader, result *pkg.Result) error {
 	for _, pattern := range config.Paths {
 		if err := checkPattern(pattern, result, config); err != nil {
 			return err
@@ -52,7 +51,7 @@ func Check(config *ConfigHeader, result *Result) error {
 
 var seen = make(map[string]bool)
 
-func checkPattern(pattern string, result *Result, config *ConfigHeader) error {
+func checkPattern(pattern string, result *pkg.Result, config *ConfigHeader) error {
 	paths, err := doublestar.Glob(pattern)
 
 	if err != nil {
@@ -73,7 +72,7 @@ func checkPattern(pattern string, result *Result, config *ConfigHeader) error {
 	return nil
 }
 
-func checkPath(path string, result *Result, config *ConfigHeader) error {
+func checkPath(path string, result *pkg.Result, config *ConfigHeader) error {
 	defer func() { seen[path] = true }()
 
 	if yes, err := config.ShouldIgnore(path); yes || seen[path] || err != nil {
@@ -106,7 +105,7 @@ func checkPath(path string, result *Result, config *ConfigHeader) error {
 }
 
 // CheckFile checks whether or not the file contains the configured license header.
-func CheckFile(file string, config *ConfigHeader, result *Result) error {
+func CheckFile(file string, config *ConfigHeader, result *pkg.Result) error {
 	if yes, err := config.ShouldIgnore(file); yes || err != nil {
 		if !seen[file] {
 			result.Ignore(file)
@@ -116,14 +115,6 @@ func CheckFile(file string, config *ConfigHeader, result *Result) error {
 
 	logger.Log.Debugln("Checking file:", file)
 
-	reader, err := os.Open(file)
-
-	if err != nil {
-		return err
-	}
-
-	var lines []string
-
 	bs, err := ioutil.ReadFile(file)
 	if err != nil {
 		return err
@@ -133,24 +124,13 @@ func CheckFile(file string, config *ConfigHeader, result *Result) error {
 		return nil
 	}
 
-	scanner := bufio.NewScanner(reader)
-	for scanner.Scan() {
-		line := strings.ToLower(Punctuations.ReplaceAllString(scanner.Text(), " "))
-		if len(line) > 0 {
-			lines = append(lines, line)
-		}
-	}
+	content := lcs.NormalizeHeader(string(bs))
+	expected, pattern := config.NormalizedLicense(), config.NormalizedPattern()
 
-	content := Punctuations.ReplaceAllString(strings.Join(lines, " "), " ")
-	license, pattern := config.NormalizedLicense(), config.NormalizedPattern()
-
-	if satisfy(content, license, pattern) {
+	if satisfy(content, expected, pattern) {
 		result.Succeed(file)
 	} else {
 		logger.Log.Debugln("Content is:", content)
-		if pattern != nil {
-			logger.Log.Debugln("Pattern is:", pattern)
-		}
 
 		result.Fail(file)
 	}
@@ -159,7 +139,7 @@ func CheckFile(file string, config *ConfigHeader, result *Result) error {
 }
 
 func satisfy(content, license string, pattern *regexp.Regexp) bool {
-	if index := strings.Index(content, license); index >= 0 {
+	if index := strings.Index(content, license); license != "" && index >= 0 {
 		return index < LicenseLocationThreshold
 	}
 
diff --git a/license-eye/pkg/header/check_test.go b/license-eye/pkg/header/check_test.go
index 2cc2de7..ca8e1bb 100644
--- a/license-eye/pkg/header/check_test.go
+++ b/license-eye/pkg/header/check_test.go
@@ -24,6 +24,7 @@ import (
 	"strings"
 	"testing"
 
+	"github.com/apache/skywalking-eyes/license-eye/pkg"
 	"gopkg.in/yaml.v3"
 )
 
@@ -48,7 +49,7 @@ func TestCheckFile(t *testing.T) {
 	type args struct {
 		name       string
 		file       string
-		result     *Result
+		result     *pkg.Result
 		wantErr    bool
 		hasFailure bool
 	}
@@ -64,7 +65,7 @@ func TestCheckFile(t *testing.T) {
 			cases = append(cases, args{
 				name:       file,
 				file:       file,
-				result:     &Result{},
+				result:     &pkg.Result{},
 				wantErr:    false,
 				hasFailure: false,
 			})
@@ -99,7 +100,7 @@ func TestCheckFileFailure(t *testing.T) {
 	type args struct {
 		name       string
 		file       string
-		result     *Result
+		result     *pkg.Result
 		wantErr    bool
 		hasFailure bool
 	}
@@ -115,7 +116,7 @@ func TestCheckFileFailure(t *testing.T) {
 			cases = append(cases, args{
 				name:       file,
 				file:       file,
-				result:     &Result{},
+				result:     &pkg.Result{},
 				wantErr:    false,
 				hasFailure: true,
 			})
diff --git a/license-eye/pkg/header/config.go b/license-eye/pkg/header/config.go
index 8911cfc..d303dd4 100644
--- a/license-eye/pkg/header/config.go
+++ b/license-eye/pkg/header/config.go
@@ -19,15 +19,14 @@ package header
 
 import (
 	"bufio"
-	"io/ioutil"
 	"os"
 	"regexp"
 	"strings"
 
 	"github.com/apache/skywalking-eyes/license-eye/internal/logger"
+	"github.com/apache/skywalking-eyes/license-eye/pkg/license"
 
 	"github.com/bmatcuk/doublestar/v2"
-	"gopkg.in/yaml.v3"
 )
 
 type ConfigHeader struct {
@@ -40,47 +39,19 @@ type ConfigHeader struct {
 // NormalizedLicense returns the normalized string of the license content,
 // "normalized" means the linebreaks and Punctuations are all trimmed.
 func (config *ConfigHeader) NormalizedLicense() string {
-	var lines []string
-	for _, line := range strings.Split(config.License, "\n") {
-		if len(line) > 0 {
-			lines = append(lines, Punctuations.ReplaceAllString(line, " "))
-		}
-	}
-	return strings.ToLower(regexp.MustCompile("(?m)[\\s\"']+").ReplaceAllString(strings.Join(lines, " "), " "))
+	return license.Normalize(config.License)
 }
 
 func (config *ConfigHeader) NormalizedPattern() *regexp.Regexp {
-	if config.Pattern == "" || strings.TrimSpace(config.Pattern) == "" {
-		return nil
-	}
+	pattern := config.Pattern
 
-	var lines []string
-	for _, line := range strings.Split(config.Pattern, "\n") {
-		if len(line) > 0 {
-			lines = append(lines, line)
-		}
-	}
-	content := regexp.MustCompile("(?m)[\\s\"':;/\\-]+").ReplaceAllString(strings.Join(lines, " "), " ")
-	return regexp.MustCompile("(?i).*" + content + ".*")
-}
-
-// Parse reads and parses the header check configurations in config file.
-func (config *ConfigHeader) Parse(file string) error {
-	logger.Log.Infoln("Loading configuration from file:", file)
-
-	if bytes, err := ioutil.ReadFile(file); err != nil {
-		return err
-	} else if err := yaml.Unmarshal(bytes, config); err != nil {
-		return err
+	if pattern == "" || strings.TrimSpace(pattern) == "" {
+		return nil
 	}
 
-	logger.Log.Debugln("License header is:", config.NormalizedLicense())
+	pattern = license.NormalizePattern(pattern)
 
-	if len(config.Paths) == 0 {
-		config.Paths = []string{"**"}
-	}
-
-	return nil
+	return regexp.MustCompile("(?i).*" + pattern + ".*")
 }
 
 func (config *ConfigHeader) ShouldIgnore(path string) (bool, error) {
@@ -103,8 +74,6 @@ func (config *ConfigHeader) ShouldIgnore(path string) (bool, error) {
 }
 
 func (config *ConfigHeader) Finalize() error {
-	logger.Log.Debugln("License header is:", config.NormalizedLicense())
-
 	if len(config.Paths) == 0 {
 		config.Paths = []string{"**"}
 	}
@@ -124,5 +93,10 @@ func (config *ConfigHeader) Finalize() error {
 		}
 	}
 
+	logger.Log.Debugln("License header is:", config.NormalizedLicense())
+	if p := config.NormalizedPattern(); p != nil {
+		logger.Log.Debugln("Pattern is:", p)
+	}
+
 	return nil
 }
diff --git a/license-eye/pkg/header/fix.go b/license-eye/pkg/header/fix.go
index 40acde7..203de06 100644
--- a/license-eye/pkg/header/fix.go
+++ b/license-eye/pkg/header/fix.go
@@ -26,12 +26,13 @@ import (
 	"strings"
 
 	"github.com/apache/skywalking-eyes/license-eye/internal/logger"
+	"github.com/apache/skywalking-eyes/license-eye/pkg"
 	"github.com/apache/skywalking-eyes/license-eye/pkg/comments"
 )
 
 // Fix adds the configured license header to the given file.
-func Fix(file string, config *ConfigHeader, result *Result) error {
-	var r Result
+func Fix(file string, config *ConfigHeader, result *pkg.Result) error {
+	var r pkg.Result
 	if err := CheckFile(file, config, &r); err != nil || !r.HasFailure() {
 		logger.Log.Warnln("Try to fix a valid file, do nothing:", file)
 		return err
@@ -50,7 +51,7 @@ func Fix(file string, config *ConfigHeader, result *Result) error {
 	return nil
 }
 
-func InsertComment(file string, style *comments.CommentStyle, config *ConfigHeader, result *Result) error {
+func InsertComment(file string, style *comments.CommentStyle, config *ConfigHeader, result *pkg.Result) error {
 	stat, err := os.Stat(file)
 	if err != nil {
 		return err
diff --git a/license-eye/pkg/license/norm.go b/license-eye/pkg/license/norm.go
new file mode 100644
index 0000000..771f2b2
--- /dev/null
+++ b/license-eye/pkg/license/norm.go
@@ -0,0 +1,171 @@
+//
+// Licensed to Apache Software Foundation (ASF) under one or more contributor
+// license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright
+// ownership. Apache Software Foundation (ASF) licenses this file to you under
+// the Apache License, Version 2.0 (the "License"); you may
+// not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+//
+package license
+
+import (
+	"regexp"
+	"strings"
+)
+
+type Normalizer func(string) string
+
+var (
+	// normalizers is a list of Normalizer that can be applied to the license text, yet doesn't change the license's
+	// meanings, according to the matching guide in https://spdx.dev/license-list/matching-guidelines.
+	// The order matters.
+	normalizers = []Normalizer{
+		OneLineNormalizer,
+		FlattenSpaceNormalizer,
+		SubstantiveTextsNormalizer,
+		strings.ToLower,
+		strings.TrimSpace,
+	}
+
+	// 6. Code Comment Indicators (https://spdx.dev/license-list/matching-guidelines.)
+	commentIndicators = []*regexp.Regexp{
+		regexp.MustCompile(`(?m)^\s*#+`),    // #
+		regexp.MustCompile(`(?m)^\s*//+`),   // //
+		regexp.MustCompile(`(?m)^\s*"""+`),  // """
+		regexp.MustCompile(`(?m)^\s*\(\*+`), // (*
+
+		regexp.MustCompile(`(?m)^\s*/\*+`), // /*
+		regexp.MustCompile(`(?m)^\s*\*+/`), //  */
+		regexp.MustCompile(`(?m)^\s*\*+`),  //  *
+
+		regexp.MustCompile(`(?m)^\s*<!--+`), // <!--
+		regexp.MustCompile(`(?m)^\s*--+>`),  // -->
+		regexp.MustCompile(`(?m)^\s*--+`),   // --
+		regexp.MustCompile(`(?m)^\s*~+`),    //   ~
+
+		regexp.MustCompile(`(?m)^\s*{-+`), // {-
+		regexp.MustCompile(`(?m)^\s*-}+`), // -}
+
+		regexp.MustCompile(`(?m)^\s*::`),   // ::
+		regexp.MustCompile(`(?m)^\s*@REM`), // @REM
+	}
+
+	flattenSpace = regexp.MustCompile(`\s+`)
+
+	substitutableTexts = []struct {
+		regex       *regexp.Regexp
+		replacement string
+	}{
+		{regexp.MustCompile(`(?i)\backnowledgement\b`), "acknowledgment"},
+		{regexp.MustCompile(`(?i)\banalog\b`), "analogue"},
+		{regexp.MustCompile(`(?i)\banalyze\b`), "analyse"},
+		{regexp.MustCompile(`(?i)\bartifact\b`), "artefact"},
+		{regexp.MustCompile(`(?i)\bauthorization\b`), "authorisation"},
+		{regexp.MustCompile(`(?i)\bauthorized\b`), "authorised"},
+		{regexp.MustCompile(`(?i)\bcaliber\b`), "calibre"},
+		{regexp.MustCompile(`(?i)\bcanceled\b`), "cancelled"},
+		{regexp.MustCompile(`(?i)\bcapitalizations\b`), "capitalisations"},
+		{regexp.MustCompile(`(?i)\bcatalog\b`), "catalogue"},
+		{regexp.MustCompile(`(?i)\bcategorize\b`), "categorise"},
+		{regexp.MustCompile(`(?i)\bcenter\b`), "centre"},
+		{regexp.MustCompile(`(?i)\bcopyright holder\b`), "copyright owner"},
+		{regexp.MustCompile(`(?i)\bemphasized\b`), "emphasised"},
+		{regexp.MustCompile(`(?i)\bfavor\b`), "favour"},
+		{regexp.MustCompile(`(?i)\bfavorite\b`), "favourite"},
+		{regexp.MustCompile(`(?i)\bfulfill\b`), "fulfil"},
+		{regexp.MustCompile(`(?i)\bfulfillment\b`), "fulfilment"},
+		{regexp.MustCompile(`(?i)\binitialize\b`), "initialise"},
+		{regexp.MustCompile(`(?i)\bjudgement\b`), "judgment"},
+		{regexp.MustCompile(`(?i)\blabeling\b`), "labelling"},
+		{regexp.MustCompile(`(?i)\blabor\b`), "labour"},
+		{regexp.MustCompile(`(?i)\blicense\b`), "licence"},
+		{regexp.MustCompile(`(?i)\bmaximize\b`), "maximise"},
+		{regexp.MustCompile(`(?i)\bmodeled\b`), "modelled"},
+		{regexp.MustCompile(`(?i)\bmodeling\b`), "modelling"},
+		{regexp.MustCompile(`(?i)\bnoncommercial\b`), "non-commercial"},
+		{regexp.MustCompile(`(?i)\boffense\b`), "offence"},
+		{regexp.MustCompile(`(?i)\boptimize\b`), "optimise"},
+		{regexp.MustCompile(`(?i)\borganization\b`), "organisation"},
+		{regexp.MustCompile(`(?i)\borganize\b`), "organise"},
+		{regexp.MustCompile(`(?i)\bpercent\b`), "per cent"},
+		{regexp.MustCompile(`(?i)\bpractice\b`), "practise"},
+		{regexp.MustCompile(`(?i)\bprogram\b`), "programme"},
+		{regexp.MustCompile(`(?i)\brealize\b`), "realise"},
+		{regexp.MustCompile(`(?i)\brecognize\b`), "recognise"},
+		{regexp.MustCompile(`(?i)\bsignaling\b`), "signalling"},
+		{regexp.MustCompile(`(?i)\bsublicense\b`), "sub-license"},
+		{regexp.MustCompile(`(?i)\bsub-license\b`), "sub license"},
+		{regexp.MustCompile(`(?i)\bsublicense\b`), "sub license"},
+		{regexp.MustCompile(`(?i)\butilization\b`), "utilisation"},
+		{regexp.MustCompile(`(?i)\bwhile\b`), "whilst"},
+		{regexp.MustCompile(`(?i)\bwilfull\b`), "wilful"},
+
+		{regexp.MustCompile(`©`), "Copyright "},
+		{regexp.MustCompile(`\(c\)`), "Copyright "},
+		{regexp.MustCompile(`\bhttps://`), "http://"},
+
+		{regexp.MustCompile(`(?i)\b(the )?Apache Software Foundation( \(ASF\))?`), "the ASF"},
+	}
+)
+
+// NormalizePattern applies a chain of Normalizers to the license pattern to make it cleaner for identification.
+func NormalizePattern(pattern string) string {
+	for _, normalize := range normalizers {
+		pattern = normalize(pattern)
+	}
+	return pattern
+}
+
+// NormalizeHeader applies a chain of Normalizers to the file header to make it cleaner for identification.
+func NormalizeHeader(header string) string {
+	ns := append([]Normalizer{CommentIndicatorNormalizer}, normalizers...)
+	for _, normalize := range ns {
+		header = normalize(header)
+	}
+	return header
+}
+
+// Normalize applies a chain of Normalizers to the license text to make it cleaner for identification.
+func Normalize(license string) string {
+	for _, normalize := range normalizers {
+		license = normalize(license)
+	}
+	return license
+}
+
+// OneLineNormalizer simply removes all line breaks to flatten the license text into one line.
+func OneLineNormalizer(text string) string {
+	return regexp.MustCompile("[\n\r]+").ReplaceAllString(text, " ")
+}
+
+// SubstantiveTextsNormalizer normalizes the license text by substituting some words that
+// doesn't change the meaning of the license.
+func SubstantiveTextsNormalizer(text string) string {
+	for _, s := range substitutableTexts {
+		text = s.regex.ReplaceAllString(text, s.replacement)
+	}
+	return text
+}
+
+// CommentIndicatorNormalizer trims the leading characters of comments, such as /*, <!--, --, (*, etc..
+func CommentIndicatorNormalizer(text string) string {
+	for _, leadingChars := range commentIndicators {
+		text = leadingChars.ReplaceAllString(text, "")
+	}
+	return text
+}
+
+// FlattenSpaceNormalizer flattens continuous spaces into a single space.
+func FlattenSpaceNormalizer(text string) string {
+	return flattenSpace.ReplaceAllString(text, " ")
+}
diff --git a/license-eye/pkg/license/norm_test.go b/license-eye/pkg/license/norm_test.go
new file mode 100644
index 0000000..fd9fbc5
--- /dev/null
+++ b/license-eye/pkg/license/norm_test.go
@@ -0,0 +1,260 @@
+//
+// Licensed to Apache Software Foundation (ASF) under one or more contributor
+// license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright
+// ownership. Apache Software Foundation (ASF) licenses this file to you under
+// the Apache License, Version 2.0 (the "License"); you may
+// not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+//
+package license
+
+import "testing"
+
+type input struct {
+	name string
+	text string
+	want string
+}
+
+func TestCommentLeadingCharsNormalizer(t *testing.T) {
+	want := ` Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+
+`
+	tests := []input{
+		{
+			name: "Jave",
+			want: want,
+			text: `
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+`,
+		},
+		{
+			name: "Python",
+			want: want,
+			text: `
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+`,
+		},
+		{
+			name: "XML",
+			want: want,
+			text: `
+<!--
+  ~ Licensed to the Apache Software Foundation (ASF) under one or more
+  ~ contributor license agreements.  See the NOTICE file distributed with
+  ~ this work for additional information regarding copyright ownership.
+  ~ The ASF licenses this file to You under the Apache License, Version 2.0
+  ~ (the "License"); you may not use this file except in compliance with
+  ~ the License.  You may obtain a copy of the License at
+  ~
+  ~    http://www.apache.org/licenses/LICENSE-2.0
+  ~
+  ~ Unless required by applicable law or agreed to in writing, software
+  ~ distributed under the License is distributed on an "AS IS" BASIS,
+  ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  ~ See the License for the specific language governing permissions and
+  ~ limitations under the License.
+-->
+`,
+		},
+		{
+			name: "GoLang",
+			want: want,
+			text: `
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements.  See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License.  You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+`,
+		},
+		{
+			name: "SQL",
+			want: want,
+			text: `
+-- Licensed to the Apache Software Foundation (ASF) under one or more
+-- contributor license agreements.  See the NOTICE file distributed with
+-- this work for additional information regarding copyright ownership.
+-- The ASF licenses this file to You under the Apache License, Version 2.0
+-- (the "License"); you may not use this file except in compliance with
+-- the License.  You may obtain a copy of the License at
+--
+--    http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+--
+`,
+		},
+		{
+			name: "BAT1",
+			want: want,
+			text: `
+:: Licensed to the Apache Software Foundation (ASF) under one or more
+:: contributor license agreements.  See the NOTICE file distributed with
+:: this work for additional information regarding copyright ownership.
+:: The ASF licenses this file to You under the Apache License, Version 2.0
+:: (the "License"); you may not use this file except in compliance with
+:: the License.  You may obtain a copy of the License at
+::
+::    http://www.apache.org/licenses/LICENSE-2.0
+::
+:: Unless required by applicable law or agreed to in writing, software
+:: distributed under the License is distributed on an "AS IS" BASIS,
+:: WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+:: See the License for the specific language governing permissions and
+:: limitations under the License.
+::
+`,
+		},
+		{
+			name: "BAT2",
+			want: want,
+			text: `
+@REM Licensed to the Apache Software Foundation (ASF) under one or more
+@REM contributor license agreements.  See the NOTICE file distributed with
+@REM this work for additional information regarding copyright ownership.
+@REM The ASF licenses this file to You under the Apache License, Version 2.0
+@REM (the "License"); you may not use this file except in compliance with
+@REM the License.  You may obtain a copy of the License at
+@REM
+@REM    http://www.apache.org/licenses/LICENSE-2.0
+@REM
+@REM Unless required by applicable law or agreed to in writing, software
+@REM distributed under the License is distributed on an "AS IS" BASIS,
+@REM WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+@REM See the License for the specific language governing permissions and
+@REM limitations under the License.
+@REM
+`,
+		},
+		{
+			name: "PythonTripleQuotes",
+			text: `
+"""
+Licensed to the Apache Software Foundation (ASF) under one or more
+contributor license agreements.  See the NOTICE file distributed with
+this work for additional information regarding copyright ownership.
+The ASF licenses this file to You under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance with
+the License.  You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+`,
+			want: `
+Licensed to the Apache Software Foundation (ASF) under one or more
+contributor license agreements.  See the NOTICE file distributed with
+this work for additional information regarding copyright ownership.
+The ASF licenses this file to You under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance with
+the License.  You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+
+`,
+		},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			if got := CommentIndicatorNormalizer(tt.text); got != tt.want {
+				t.Errorf("%v %v", len(got), len(tt.want))
+				t.Errorf("CommentIndicatorNormalizer() = %v, want %v", got, tt.want)
+			}
+		})
+	}
+}
+
+func TestSubstantiveTextsNormalizer(t *testing.T) {
+	tests := []input{
+		{
+			name: "ASF",
+			text: "Licensed to the Apache Software Foundation (ASF) under one or more",
+			want: "Licensed to the ASF under one or more",
+		},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			if got := SubstantiveTextsNormalizer(tt.text); got != tt.want {
+				t.Errorf("SubstantiveTextsNormalizer() = %v, want %v", got, tt.want)
+			}
+		})
+	}
+}
diff --git a/license-eye/pkg/header/result.go b/license-eye/pkg/result.go
similarity index 99%
rename from license-eye/pkg/header/result.go
rename to license-eye/pkg/result.go
index 068fe75..9333671 100644
--- a/license-eye/pkg/header/result.go
+++ b/license-eye/pkg/result.go
@@ -15,7 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 //
-package header
+package pkg
 
 import (
 	"fmt"