You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@skywalking.apache.org by ke...@apache.org on 2020/12/23 05:08:59 UTC

[skywalking-eyes] 01/01: Refactor the normalizers to increase accuracy and minimise the use of pattern

This is an automated email from the ASF dual-hosted git repository.

kezhenxu94 pushed a commit to branch refactor
in repository https://gitbox.apache.org/repos/asf/skywalking-eyes.git

commit e194b68d89bea1b7722833e270ad90e35980cd0c
Author: kezhenxu94 <ke...@apache.org>
AuthorDate: Wed Dec 23 13:08:41 2020 +0800

    Refactor the normalizers to increase accuracy and minimise the use of pattern
---
 .golangci.yml                          |  37 ++++-
 .licenserc.yaml                        |   4 +-
 license-eye/commands/header/check.go   |   3 +-
 license-eye/commands/header/fix.go     |   3 +-
 license-eye/internal/logger/log.go     |   2 +-
 license-eye/pkg/header/check.go        |  40 ++---
 license-eye/pkg/header/check_test.go   |   9 +-
 license-eye/pkg/header/config.go       |  50 ++-----
 license-eye/pkg/header/fix.go          |   7 +-
 license-eye/pkg/license/norm.go        | 171 ++++++++++++++++++++++
 license-eye/pkg/license/norm_test.go   | 260 +++++++++++++++++++++++++++++++++
 license-eye/pkg/{header => }/result.go |   2 +-
 12 files changed, 506 insertions(+), 82 deletions(-)

diff --git a/.golangci.yml b/.golangci.yml
index f4a7795..f4a6fee 100644
--- a/.golangci.yml
+++ b/.golangci.yml
@@ -16,6 +16,9 @@
 # under the License.
 # 
 
+run:
+  tests: false
+
 linters-settings:
   govet:
     check-shadowing: true
@@ -37,6 +40,38 @@ linters-settings:
       fmt: "logging is allowed only by logutils.Log"
   misspell:
     locale: US
+    ignore-words:
+      - analogue
+      - analyse
+      - artefact
+      - authorised
+      - calibre
+      - cancelled
+      - catalogue
+      - categorise
+      - centre
+      - emphasised
+      - favour
+      - favourite
+      - fulfil
+      - fulfilment
+      - initialise
+      - labelling
+      - labour
+      - licence
+      - maximise
+      - modelled
+      - modelling
+      - offence
+      - optimise
+      - organisation
+      - organise
+      - practise
+      - programme
+      - realise
+      - recognise
+      - signalling
+      - utilisation
   lll:
     line-length: 150
   goimports:
@@ -51,7 +86,7 @@ linters-settings:
     disabled-checks:
       - ifElseChain
   funlen:
-    lines: 150
+    lines: 100
     statements: 50
   whitespace:
     multi-if: false
diff --git a/.licenserc.yaml b/.licenserc.yaml
index e4494c7..819f2b1 100644
--- a/.licenserc.yaml
+++ b/.licenserc.yaml
@@ -38,10 +38,10 @@ header: # `header` section is configurations for source codes license header.
   # `pattern` is optional regexp if all the file headers are the same as `license` (linebreaks doesn't matter);
   # In the `pattern`, all punctuations should be removed unless they are part of the regex;
   pattern: |
-    Licensed to( the)? Apache Software Foundation \(ASF\) under one or more contributor
+    Licensed to the Apache Software Foundation under one or more contributor
     license agreements. See the NOTICE file distributed with
     this work for additional information regarding copyright
-    ownership. (Apache Software Foundation \(ASF\)|The ASF) licenses this file to you under
+    ownership. The Apache Software Foundation licenses this file to you under
     the Apache License, Version 2.0 \(the "License"\); you may
     not use this file except in compliance with the License.
     You may obtain a copy of the License at
diff --git a/license-eye/commands/header/check.go b/license-eye/commands/header/check.go
index c349ace..0578286 100644
--- a/license-eye/commands/header/check.go
+++ b/license-eye/commands/header/check.go
@@ -19,6 +19,7 @@ package header
 
 import (
 	"github.com/apache/skywalking-eyes/license-eye/internal/logger"
+	"github.com/apache/skywalking-eyes/license-eye/pkg"
 	"github.com/apache/skywalking-eyes/license-eye/pkg/config"
 	"github.com/apache/skywalking-eyes/license-eye/pkg/header"
 
@@ -31,7 +32,7 @@ var CheckCommand = &cobra.Command{
 	Long:    "check command walks the specified paths recursively and checks if the specified files have the license header in the config file.",
 	RunE: func(cmd *cobra.Command, args []string) error {
 		var config config.Config
-		var result header.Result
+		var result pkg.Result
 
 		if err := config.Parse(cfgFile); err != nil {
 			return err
diff --git a/license-eye/commands/header/fix.go b/license-eye/commands/header/fix.go
index b65069f..4fbba48 100644
--- a/license-eye/commands/header/fix.go
+++ b/license-eye/commands/header/fix.go
@@ -23,6 +23,7 @@ import (
 	"strings"
 
 	"github.com/apache/skywalking-eyes/license-eye/internal/logger"
+	"github.com/apache/skywalking-eyes/license-eye/pkg"
 	"github.com/apache/skywalking-eyes/license-eye/pkg/config"
 	"github.com/apache/skywalking-eyes/license-eye/pkg/header"
 	"github.com/spf13/cobra"
@@ -34,7 +35,7 @@ var FixCommand = &cobra.Command{
 	Long:    "fix command walks the specified paths recursively and fix the license header if the specified files don't have the license header.",
 	RunE: func(cmd *cobra.Command, args []string) error {
 		var config config.Config
-		var result header.Result
+		var result pkg.Result
 
 		if err := config.Parse(cfgFile); err != nil {
 			return err
diff --git a/license-eye/internal/logger/log.go b/license-eye/internal/logger/log.go
index 1c641e8..7fc102e 100644
--- a/license-eye/internal/logger/log.go
+++ b/license-eye/internal/logger/log.go
@@ -29,7 +29,7 @@ func init() {
 	if Log == nil {
 		Log = logrus.New()
 	}
-	Log.Level = logrus.InfoLevel
+	Log.Level = logrus.DebugLevel
 	Log.SetOutput(os.Stdout)
 	Log.SetFormatter(&logrus.TextFormatter{
 		DisableTimestamp:       true,
diff --git a/license-eye/pkg/header/check.go b/license-eye/pkg/header/check.go
index 8053cd3..c32279b 100644
--- a/license-eye/pkg/header/check.go
+++ b/license-eye/pkg/header/check.go
@@ -18,7 +18,6 @@
 package header
 
 import (
-	"bufio"
 	"io/ioutil"
 	"net/http"
 	"os"
@@ -27,20 +26,20 @@ import (
 	"strings"
 
 	"github.com/apache/skywalking-eyes/license-eye/internal/logger"
+	"github.com/apache/skywalking-eyes/license-eye/pkg"
+	lcs "github.com/apache/skywalking-eyes/license-eye/pkg/license"
 
 	"github.com/bmatcuk/doublestar/v2"
 )
 
-// TODO: also trim stop words
 var (
 	// LicenseLocationThreshold specifies the index threshold where the license header can be located,
 	// after all, a "header" cannot be TOO far from the file start.
 	LicenseLocationThreshold = 80
-	Punctuations             = regexp.MustCompile("[\\[\\]/*:;\\s#\\-!~'\"(){}?]+")
 )
 
 // Check checks the license headers of the specified paths/globs.
-func Check(config *ConfigHeader, result *Result) error {
+func Check(config *ConfigHeader, result *pkg.Result) error {
 	for _, pattern := range config.Paths {
 		if err := checkPattern(pattern, result, config); err != nil {
 			return err
@@ -52,7 +51,7 @@ func Check(config *ConfigHeader, result *Result) error {
 
 var seen = make(map[string]bool)
 
-func checkPattern(pattern string, result *Result, config *ConfigHeader) error {
+func checkPattern(pattern string, result *pkg.Result, config *ConfigHeader) error {
 	paths, err := doublestar.Glob(pattern)
 
 	if err != nil {
@@ -73,7 +72,7 @@ func checkPattern(pattern string, result *Result, config *ConfigHeader) error {
 	return nil
 }
 
-func checkPath(path string, result *Result, config *ConfigHeader) error {
+func checkPath(path string, result *pkg.Result, config *ConfigHeader) error {
 	defer func() { seen[path] = true }()
 
 	if yes, err := config.ShouldIgnore(path); yes || seen[path] || err != nil {
@@ -106,7 +105,7 @@ func checkPath(path string, result *Result, config *ConfigHeader) error {
 }
 
 // CheckFile checks whether or not the file contains the configured license header.
-func CheckFile(file string, config *ConfigHeader, result *Result) error {
+func CheckFile(file string, config *ConfigHeader, result *pkg.Result) error {
 	if yes, err := config.ShouldIgnore(file); yes || err != nil {
 		if !seen[file] {
 			result.Ignore(file)
@@ -116,14 +115,6 @@ func CheckFile(file string, config *ConfigHeader, result *Result) error {
 
 	logger.Log.Debugln("Checking file:", file)
 
-	reader, err := os.Open(file)
-
-	if err != nil {
-		return err
-	}
-
-	var lines []string
-
 	bs, err := ioutil.ReadFile(file)
 	if err != nil {
 		return err
@@ -133,24 +124,13 @@ func CheckFile(file string, config *ConfigHeader, result *Result) error {
 		return nil
 	}
 
-	scanner := bufio.NewScanner(reader)
-	for scanner.Scan() {
-		line := strings.ToLower(Punctuations.ReplaceAllString(scanner.Text(), " "))
-		if len(line) > 0 {
-			lines = append(lines, line)
-		}
-	}
+	content := lcs.NormalizeHeader(string(bs))
+	expected, pattern := config.NormalizedLicense(), config.NormalizedPattern()
 
-	content := Punctuations.ReplaceAllString(strings.Join(lines, " "), " ")
-	license, pattern := config.NormalizedLicense(), config.NormalizedPattern()
-
-	if satisfy(content, license, pattern) {
+	if satisfy(content, expected, pattern) {
 		result.Succeed(file)
 	} else {
 		logger.Log.Debugln("Content is:", content)
-		if pattern != nil {
-			logger.Log.Debugln("Pattern is:", pattern)
-		}
 
 		result.Fail(file)
 	}
@@ -159,7 +139,7 @@ func CheckFile(file string, config *ConfigHeader, result *Result) error {
 }
 
 func satisfy(content, license string, pattern *regexp.Regexp) bool {
-	if index := strings.Index(content, license); index >= 0 {
+	if index := strings.Index(content, license); license != "" && index >= 0 {
 		return index < LicenseLocationThreshold
 	}
 
diff --git a/license-eye/pkg/header/check_test.go b/license-eye/pkg/header/check_test.go
index 2cc2de7..ca8e1bb 100644
--- a/license-eye/pkg/header/check_test.go
+++ b/license-eye/pkg/header/check_test.go
@@ -24,6 +24,7 @@ import (
 	"strings"
 	"testing"
 
+	"github.com/apache/skywalking-eyes/license-eye/pkg"
 	"gopkg.in/yaml.v3"
 )
 
@@ -48,7 +49,7 @@ func TestCheckFile(t *testing.T) {
 	type args struct {
 		name       string
 		file       string
-		result     *Result
+		result     *pkg.Result
 		wantErr    bool
 		hasFailure bool
 	}
@@ -64,7 +65,7 @@ func TestCheckFile(t *testing.T) {
 			cases = append(cases, args{
 				name:       file,
 				file:       file,
-				result:     &Result{},
+				result:     &pkg.Result{},
 				wantErr:    false,
 				hasFailure: false,
 			})
@@ -99,7 +100,7 @@ func TestCheckFileFailure(t *testing.T) {
 	type args struct {
 		name       string
 		file       string
-		result     *Result
+		result     *pkg.Result
 		wantErr    bool
 		hasFailure bool
 	}
@@ -115,7 +116,7 @@ func TestCheckFileFailure(t *testing.T) {
 			cases = append(cases, args{
 				name:       file,
 				file:       file,
-				result:     &Result{},
+				result:     &pkg.Result{},
 				wantErr:    false,
 				hasFailure: true,
 			})
diff --git a/license-eye/pkg/header/config.go b/license-eye/pkg/header/config.go
index 8911cfc..d303dd4 100644
--- a/license-eye/pkg/header/config.go
+++ b/license-eye/pkg/header/config.go
@@ -19,15 +19,14 @@ package header
 
 import (
 	"bufio"
-	"io/ioutil"
 	"os"
 	"regexp"
 	"strings"
 
 	"github.com/apache/skywalking-eyes/license-eye/internal/logger"
+	"github.com/apache/skywalking-eyes/license-eye/pkg/license"
 
 	"github.com/bmatcuk/doublestar/v2"
-	"gopkg.in/yaml.v3"
 )
 
 type ConfigHeader struct {
@@ -40,47 +39,19 @@ type ConfigHeader struct {
 // NormalizedLicense returns the normalized string of the license content,
 // "normalized" means the linebreaks and Punctuations are all trimmed.
 func (config *ConfigHeader) NormalizedLicense() string {
-	var lines []string
-	for _, line := range strings.Split(config.License, "\n") {
-		if len(line) > 0 {
-			lines = append(lines, Punctuations.ReplaceAllString(line, " "))
-		}
-	}
-	return strings.ToLower(regexp.MustCompile("(?m)[\\s\"']+").ReplaceAllString(strings.Join(lines, " "), " "))
+	return license.Normalize(config.License)
 }
 
 func (config *ConfigHeader) NormalizedPattern() *regexp.Regexp {
-	if config.Pattern == "" || strings.TrimSpace(config.Pattern) == "" {
-		return nil
-	}
+	pattern := config.Pattern
 
-	var lines []string
-	for _, line := range strings.Split(config.Pattern, "\n") {
-		if len(line) > 0 {
-			lines = append(lines, line)
-		}
-	}
-	content := regexp.MustCompile("(?m)[\\s\"':;/\\-]+").ReplaceAllString(strings.Join(lines, " "), " ")
-	return regexp.MustCompile("(?i).*" + content + ".*")
-}
-
-// Parse reads and parses the header check configurations in config file.
-func (config *ConfigHeader) Parse(file string) error {
-	logger.Log.Infoln("Loading configuration from file:", file)
-
-	if bytes, err := ioutil.ReadFile(file); err != nil {
-		return err
-	} else if err := yaml.Unmarshal(bytes, config); err != nil {
-		return err
+	if pattern == "" || strings.TrimSpace(pattern) == "" {
+		return nil
 	}
 
-	logger.Log.Debugln("License header is:", config.NormalizedLicense())
+	pattern = license.NormalizePattern(pattern)
 
-	if len(config.Paths) == 0 {
-		config.Paths = []string{"**"}
-	}
-
-	return nil
+	return regexp.MustCompile("(?i).*" + pattern + ".*")
 }
 
 func (config *ConfigHeader) ShouldIgnore(path string) (bool, error) {
@@ -103,8 +74,6 @@ func (config *ConfigHeader) ShouldIgnore(path string) (bool, error) {
 }
 
 func (config *ConfigHeader) Finalize() error {
-	logger.Log.Debugln("License header is:", config.NormalizedLicense())
-
 	if len(config.Paths) == 0 {
 		config.Paths = []string{"**"}
 	}
@@ -124,5 +93,10 @@ func (config *ConfigHeader) Finalize() error {
 		}
 	}
 
+	logger.Log.Debugln("License header is:", config.NormalizedLicense())
+	if p := config.NormalizedPattern(); p != nil {
+		logger.Log.Debugln("Pattern is:", p)
+	}
+
 	return nil
 }
diff --git a/license-eye/pkg/header/fix.go b/license-eye/pkg/header/fix.go
index 40acde7..203de06 100644
--- a/license-eye/pkg/header/fix.go
+++ b/license-eye/pkg/header/fix.go
@@ -26,12 +26,13 @@ import (
 	"strings"
 
 	"github.com/apache/skywalking-eyes/license-eye/internal/logger"
+	"github.com/apache/skywalking-eyes/license-eye/pkg"
 	"github.com/apache/skywalking-eyes/license-eye/pkg/comments"
 )
 
 // Fix adds the configured license header to the given file.
-func Fix(file string, config *ConfigHeader, result *Result) error {
-	var r Result
+func Fix(file string, config *ConfigHeader, result *pkg.Result) error {
+	var r pkg.Result
 	if err := CheckFile(file, config, &r); err != nil || !r.HasFailure() {
 		logger.Log.Warnln("Try to fix a valid file, do nothing:", file)
 		return err
@@ -50,7 +51,7 @@ func Fix(file string, config *ConfigHeader, result *Result) error {
 	return nil
 }
 
-func InsertComment(file string, style *comments.CommentStyle, config *ConfigHeader, result *Result) error {
+func InsertComment(file string, style *comments.CommentStyle, config *ConfigHeader, result *pkg.Result) error {
 	stat, err := os.Stat(file)
 	if err != nil {
 		return err
diff --git a/license-eye/pkg/license/norm.go b/license-eye/pkg/license/norm.go
new file mode 100644
index 0000000..771f2b2
--- /dev/null
+++ b/license-eye/pkg/license/norm.go
@@ -0,0 +1,171 @@
+//
+// Licensed to Apache Software Foundation (ASF) under one or more contributor
+// license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright
+// ownership. Apache Software Foundation (ASF) licenses this file to you under
+// the Apache License, Version 2.0 (the "License"); you may
+// not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+//
+package license
+
+import (
+	"regexp"
+	"strings"
+)
+
+type Normalizer func(string) string
+
+var (
+	// normalizers is a list of Normalizer that can be applied to the license text, yet doesn't change the license's
+	// meanings, according to the matching guide in https://spdx.dev/license-list/matching-guidelines.
+	// The order matters.
+	normalizers = []Normalizer{
+		OneLineNormalizer,
+		FlattenSpaceNormalizer,
+		SubstantiveTextsNormalizer,
+		strings.ToLower,
+		strings.TrimSpace,
+	}
+
+	// 6. Code Comment Indicators (https://spdx.dev/license-list/matching-guidelines.)
+	commentIndicators = []*regexp.Regexp{
+		regexp.MustCompile(`(?m)^\s*#+`),    // #
+		regexp.MustCompile(`(?m)^\s*//+`),   // //
+		regexp.MustCompile(`(?m)^\s*"""+`),  // """
+		regexp.MustCompile(`(?m)^\s*\(\*+`), // (*
+
+		regexp.MustCompile(`(?m)^\s*/\*+`), // /*
+		regexp.MustCompile(`(?m)^\s*\*+/`), //  */
+		regexp.MustCompile(`(?m)^\s*\*+`),  //  *
+
+		regexp.MustCompile(`(?m)^\s*<!--+`), // <!--
+		regexp.MustCompile(`(?m)^\s*--+>`),  // -->
+		regexp.MustCompile(`(?m)^\s*--+`),   // --
+		regexp.MustCompile(`(?m)^\s*~+`),    //   ~
+
+		regexp.MustCompile(`(?m)^\s*{-+`), // {-
+		regexp.MustCompile(`(?m)^\s*-}+`), // -}
+
+		regexp.MustCompile(`(?m)^\s*::`),   // ::
+		regexp.MustCompile(`(?m)^\s*@REM`), // @REM
+	}
+
+	flattenSpace = regexp.MustCompile(`\s+`)
+
+	substitutableTexts = []struct {
+		regex       *regexp.Regexp
+		replacement string
+	}{
+		{regexp.MustCompile(`(?i)\backnowledgement\b`), "acknowledgment"},
+		{regexp.MustCompile(`(?i)\banalog\b`), "analogue"},
+		{regexp.MustCompile(`(?i)\banalyze\b`), "analyse"},
+		{regexp.MustCompile(`(?i)\bartifact\b`), "artefact"},
+		{regexp.MustCompile(`(?i)\bauthorization\b`), "authorisation"},
+		{regexp.MustCompile(`(?i)\bauthorized\b`), "authorised"},
+		{regexp.MustCompile(`(?i)\bcaliber\b`), "calibre"},
+		{regexp.MustCompile(`(?i)\bcanceled\b`), "cancelled"},
+		{regexp.MustCompile(`(?i)\bcapitalizations\b`), "capitalisations"},
+		{regexp.MustCompile(`(?i)\bcatalog\b`), "catalogue"},
+		{regexp.MustCompile(`(?i)\bcategorize\b`), "categorise"},
+		{regexp.MustCompile(`(?i)\bcenter\b`), "centre"},
+		{regexp.MustCompile(`(?i)\bcopyright holder\b`), "copyright owner"},
+		{regexp.MustCompile(`(?i)\bemphasized\b`), "emphasised"},
+		{regexp.MustCompile(`(?i)\bfavor\b`), "favour"},
+		{regexp.MustCompile(`(?i)\bfavorite\b`), "favourite"},
+		{regexp.MustCompile(`(?i)\bfulfill\b`), "fulfil"},
+		{regexp.MustCompile(`(?i)\bfulfillment\b`), "fulfilment"},
+		{regexp.MustCompile(`(?i)\binitialize\b`), "initialise"},
+		{regexp.MustCompile(`(?i)\bjudgement\b`), "judgment"},
+		{regexp.MustCompile(`(?i)\blabeling\b`), "labelling"},
+		{regexp.MustCompile(`(?i)\blabor\b`), "labour"},
+		{regexp.MustCompile(`(?i)\blicense\b`), "licence"},
+		{regexp.MustCompile(`(?i)\bmaximize\b`), "maximise"},
+		{regexp.MustCompile(`(?i)\bmodeled\b`), "modelled"},
+		{regexp.MustCompile(`(?i)\bmodeling\b`), "modelling"},
+		{regexp.MustCompile(`(?i)\bnoncommercial\b`), "non-commercial"},
+		{regexp.MustCompile(`(?i)\boffense\b`), "offence"},
+		{regexp.MustCompile(`(?i)\boptimize\b`), "optimise"},
+		{regexp.MustCompile(`(?i)\borganization\b`), "organisation"},
+		{regexp.MustCompile(`(?i)\borganize\b`), "organise"},
+		{regexp.MustCompile(`(?i)\bpercent\b`), "per cent"},
+		{regexp.MustCompile(`(?i)\bpractice\b`), "practise"},
+		{regexp.MustCompile(`(?i)\bprogram\b`), "programme"},
+		{regexp.MustCompile(`(?i)\brealize\b`), "realise"},
+		{regexp.MustCompile(`(?i)\brecognize\b`), "recognise"},
+		{regexp.MustCompile(`(?i)\bsignaling\b`), "signalling"},
+		{regexp.MustCompile(`(?i)\bsublicense\b`), "sub-license"},
+		{regexp.MustCompile(`(?i)\bsub-license\b`), "sub license"},
+		{regexp.MustCompile(`(?i)\bsublicense\b`), "sub license"},
+		{regexp.MustCompile(`(?i)\butilization\b`), "utilisation"},
+		{regexp.MustCompile(`(?i)\bwhile\b`), "whilst"},
+		{regexp.MustCompile(`(?i)\bwilfull\b`), "wilful"},
+
+		{regexp.MustCompile(`©`), "Copyright "},
+		{regexp.MustCompile(`\(c\)`), "Copyright "},
+		{regexp.MustCompile(`\bhttps://`), "http://"},
+
+		{regexp.MustCompile(`(?i)\b(the )?Apache Software Foundation( \(ASF\))?`), "the ASF"},
+	}
+)
+
+// NormalizePattern applies a chain of Normalizers to the license pattern to make it cleaner for identification.
+func NormalizePattern(pattern string) string {
+	for _, normalize := range normalizers {
+		pattern = normalize(pattern)
+	}
+	return pattern
+}
+
+// NormalizeHeader applies a chain of Normalizers to the file header to make it cleaner for identification.
+func NormalizeHeader(header string) string {
+	ns := append([]Normalizer{CommentIndicatorNormalizer}, normalizers...)
+	for _, normalize := range ns {
+		header = normalize(header)
+	}
+	return header
+}
+
+// Normalize applies a chain of Normalizers to the license text to make it cleaner for identification.
+func Normalize(license string) string {
+	for _, normalize := range normalizers {
+		license = normalize(license)
+	}
+	return license
+}
+
+// OneLineNormalizer simply removes all line breaks to flatten the license text into one line.
+func OneLineNormalizer(text string) string {
+	return regexp.MustCompile("[\n\r]+").ReplaceAllString(text, " ")
+}
+
+// SubstantiveTextsNormalizer normalizes the license text by substituting some words that
+// doesn't change the meaning of the license.
+func SubstantiveTextsNormalizer(text string) string {
+	for _, s := range substitutableTexts {
+		text = s.regex.ReplaceAllString(text, s.replacement)
+	}
+	return text
+}
+
+// CommentIndicatorNormalizer trims the leading characters of comments, such as /*, <!--, --, (*, etc..
+func CommentIndicatorNormalizer(text string) string {
+	for _, leadingChars := range commentIndicators {
+		text = leadingChars.ReplaceAllString(text, "")
+	}
+	return text
+}
+
+// FlattenSpaceNormalizer flattens continuous spaces into a single space.
+func FlattenSpaceNormalizer(text string) string {
+	return flattenSpace.ReplaceAllString(text, " ")
+}
diff --git a/license-eye/pkg/license/norm_test.go b/license-eye/pkg/license/norm_test.go
new file mode 100644
index 0000000..fd9fbc5
--- /dev/null
+++ b/license-eye/pkg/license/norm_test.go
@@ -0,0 +1,260 @@
+//
+// Licensed to Apache Software Foundation (ASF) under one or more contributor
+// license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright
+// ownership. Apache Software Foundation (ASF) licenses this file to you under
+// the Apache License, Version 2.0 (the "License"); you may
+// not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+//
+package license
+
+import "testing"
+
+type input struct {
+	name string
+	text string
+	want string
+}
+
+func TestCommentLeadingCharsNormalizer(t *testing.T) {
+	want := ` Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+
+`
+	tests := []input{
+		{
+			name: "Jave",
+			want: want,
+			text: `
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+`,
+		},
+		{
+			name: "Python",
+			want: want,
+			text: `
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+`,
+		},
+		{
+			name: "XML",
+			want: want,
+			text: `
+<!--
+  ~ Licensed to the Apache Software Foundation (ASF) under one or more
+  ~ contributor license agreements.  See the NOTICE file distributed with
+  ~ this work for additional information regarding copyright ownership.
+  ~ The ASF licenses this file to You under the Apache License, Version 2.0
+  ~ (the "License"); you may not use this file except in compliance with
+  ~ the License.  You may obtain a copy of the License at
+  ~
+  ~    http://www.apache.org/licenses/LICENSE-2.0
+  ~
+  ~ Unless required by applicable law or agreed to in writing, software
+  ~ distributed under the License is distributed on an "AS IS" BASIS,
+  ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  ~ See the License for the specific language governing permissions and
+  ~ limitations under the License.
+-->
+`,
+		},
+		{
+			name: "GoLang",
+			want: want,
+			text: `
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements.  See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License.  You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+`,
+		},
+		{
+			name: "SQL",
+			want: want,
+			text: `
+-- Licensed to the Apache Software Foundation (ASF) under one or more
+-- contributor license agreements.  See the NOTICE file distributed with
+-- this work for additional information regarding copyright ownership.
+-- The ASF licenses this file to You under the Apache License, Version 2.0
+-- (the "License"); you may not use this file except in compliance with
+-- the License.  You may obtain a copy of the License at
+--
+--    http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+--
+`,
+		},
+		{
+			name: "BAT1",
+			want: want,
+			text: `
+:: Licensed to the Apache Software Foundation (ASF) under one or more
+:: contributor license agreements.  See the NOTICE file distributed with
+:: this work for additional information regarding copyright ownership.
+:: The ASF licenses this file to You under the Apache License, Version 2.0
+:: (the "License"); you may not use this file except in compliance with
+:: the License.  You may obtain a copy of the License at
+::
+::    http://www.apache.org/licenses/LICENSE-2.0
+::
+:: Unless required by applicable law or agreed to in writing, software
+:: distributed under the License is distributed on an "AS IS" BASIS,
+:: WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+:: See the License for the specific language governing permissions and
+:: limitations under the License.
+::
+`,
+		},
+		{
+			name: "BAT2",
+			want: want,
+			text: `
+@REM Licensed to the Apache Software Foundation (ASF) under one or more
+@REM contributor license agreements.  See the NOTICE file distributed with
+@REM this work for additional information regarding copyright ownership.
+@REM The ASF licenses this file to You under the Apache License, Version 2.0
+@REM (the "License"); you may not use this file except in compliance with
+@REM the License.  You may obtain a copy of the License at
+@REM
+@REM    http://www.apache.org/licenses/LICENSE-2.0
+@REM
+@REM Unless required by applicable law or agreed to in writing, software
+@REM distributed under the License is distributed on an "AS IS" BASIS,
+@REM WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+@REM See the License for the specific language governing permissions and
+@REM limitations under the License.
+@REM
+`,
+		},
+		{
+			name: "PythonTripleQuotes",
+			text: `
+"""
+Licensed to the Apache Software Foundation (ASF) under one or more
+contributor license agreements.  See the NOTICE file distributed with
+this work for additional information regarding copyright ownership.
+The ASF licenses this file to You under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance with
+the License.  You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+`,
+			want: `
+Licensed to the Apache Software Foundation (ASF) under one or more
+contributor license agreements.  See the NOTICE file distributed with
+this work for additional information regarding copyright ownership.
+The ASF licenses this file to You under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance with
+the License.  You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+
+`,
+		},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			if got := CommentIndicatorNormalizer(tt.text); got != tt.want {
+				t.Errorf("%v %v", len(got), len(tt.want))
+				t.Errorf("CommentIndicatorNormalizer() = %v, want %v", got, tt.want)
+			}
+		})
+	}
+}
+
+func TestSubstantiveTextsNormalizer(t *testing.T) {
+	tests := []input{
+		{
+			name: "ASF",
+			text: "Licensed to the Apache Software Foundation (ASF) under one or more",
+			want: "Licensed to the ASF under one or more",
+		},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			if got := SubstantiveTextsNormalizer(tt.text); got != tt.want {
+				t.Errorf("SubstantiveTextsNormalizer() = %v, want %v", got, tt.want)
+			}
+		})
+	}
+}
diff --git a/license-eye/pkg/header/result.go b/license-eye/pkg/result.go
similarity index 99%
rename from license-eye/pkg/header/result.go
rename to license-eye/pkg/result.go
index 068fe75..9333671 100644
--- a/license-eye/pkg/header/result.go
+++ b/license-eye/pkg/result.go
@@ -15,7 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 //
-package header
+package pkg
 
 import (
 	"fmt"