You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@openwhisk.apache.org by ho...@apache.org on 2017/06/23 16:03:32 UTC

[incubator-openwhisk-utilities] branch master updated: Add support for regex scans and lax on Golang tabs. (#16)

This is an automated email from the ASF dual-hosted git repository.

houshengbo pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-openwhisk-utilities.git


The following commit(s) were added to refs/heads/master by this push:
     new 0841bdf  Add support for regex scans and lax on Golang tabs. (#16)
0841bdf is described below

commit 0841bdfacc4f771118b9dacae80b1c6ecb037159
Author: Matt Rutkowski <mr...@us.ibm.com>
AuthorDate: Fri Jun 23 11:03:29 2017 -0500

    Add support for regex scans and lax on Golang tabs. (#16)
    
    * Add support for regex scans and lax on Golang tabs.
    
    * Add support for regex scans and lax on Golang tabs.
    
    * Add support for regex scans and lax on Golang tabs.
    
    * Add support for regex scans and lax on Golang tabs.
---
 scancode/README.md    | 21 +++++++++++++--------
 scancode/scanCode.cfg | 15 ++++++++++++++-
 scancode/scanCode.py  | 47 +++++++++++++++++++++++++++++++++++++++++++----
 scancode/travis.cfg   |  9 ++++++++-
 4 files changed, 78 insertions(+), 14 deletions(-)

diff --git a/scancode/README.md b/scancode/README.md
index e7818e7..dd1aeee 100644
--- a/scancode/README.md
+++ b/scancode/README.md
@@ -13,17 +13,17 @@ provided configuration.
 ### positional arguments:
  * root_directory   : starting directory for the scan
 
-### optional arguments:  
- * -h, --help       : show this help message and exit  
- * -v, --verbose    : enable verbose output  
- * --config CONFIG  : provide custom configuration file  
- 
+### optional arguments:
+ * -h, --help       : show this help message and exit
+ * -v, --verbose    : enable verbose output
+ * --config CONFIG  : provide custom configuration file
+
  # Configuration file format
- 
+
  ## Supported sections
- 
+
  ### [Licenses]
- 
+
 List of filenames containing the text of valid license (headers).
 These files SHOULD be in the same directory path where scanCode.py
 resides.
@@ -39,11 +39,16 @@ These include:
 - no_tabs
 - no_trailing_spaces
 - eol_at_eof
+- check_regex
 
 ### [Excludes]
 
 List of paths (inclusive of subdirectories) to exlude from code scanning.
 
+### [Regex]
+
+List of regular expressions for forbidden strings, e.g. \w+@company.com
+
 ### [Options]
 
 List of additional key-value pair format options.
diff --git a/scancode/scanCode.cfg b/scancode/scanCode.cfg
index 3e5387f..97edd81 100644
--- a/scancode/scanCode.cfg
+++ b/scancode/scanCode.cfg
@@ -20,7 +20,7 @@ ApacheIBMLicenseHeader.txt
 *.js=no_tabs, no_trailing_spaces, eol_at_eof
 *.gradle=no_tabs, no_trailing_spaces, eol_at_eof
 *.md=no_tabs, eol_at_eof
-*.go=has_block_license, no_tabs, no_trailing_spaces, eol_at_eof
+*.go=has_block_license, no_trailing_spaces, eol_at_eof
 *.lua=has_block_license
 build.xml=no_tabs, no_trailing_spaces, eol_at_eof
 deploy.xml=no_tabs, no_trailing_spaces, eol_at_eof
@@ -30,12 +30,20 @@ deploy.xml=no_tabs, no_trailing_spaces, eol_at_eof
 # General exclusions
 .tox
 .git
+.bin
+
 # OpenWhisk exclusions
 bin
+tests
 tests/build/reports
 tests/dat
+
 # openwhisk-catalog exclusions (Python samples)
 packages/samples/hello
+
+# CLI, autogenerated Go files
+wski18n
+
 # The following repos. have so far been identified as having scanning errors
 # and will be excluded until their owners have completed updates to add Travis
 # support, run scancode and become compliant.
@@ -48,6 +56,8 @@ openwhisk-devtools
 openwhisk-package-jira
 openwhisk-sample-matos
 openwhisk-GitHubSlackBot
+openwhisk-tutorial
+openwhisk-apigateway
 
 [Options]
 # Not all code files allow licenses to appear starting at the first character
@@ -55,3 +65,6 @@ openwhisk-GitHubSlackBot
 # within first 'x' characters of each code file (as provided by this option's
 # value).
 LICENSE_SLACK_LENGTH=500
+
+# List of regular expressions for forbidden strings, e.g. \w+@company.com
+[Regex]
diff --git a/scancode/scanCode.py b/scancode/scanCode.py
index 4eadf4d..b534a02 100755
--- a/scancode/scanCode.py
+++ b/scancode/scanCode.py
@@ -7,6 +7,7 @@
    - no trailing whitespace
    - files end with EOL
    - valid license headers in source files (where applicable)
+   - general regex. string search
 
 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
@@ -49,22 +50,25 @@ RED = '\033[91m'
 YELLOW = '\033[33m'
 
 # Translatable messages (error and general)
+ERR_REGEX = "file contains a forbidden string. string=[%s], regex=[%s]"
 ERR_GENERAL = "an unspecified error was detected."
 ERR_INVALID_CONFIG_FILE = "Invalid configuration file [%s]: %s.\n"
+ERR_INVALID_SCAN_FUNCTION = "Config. file filter [%s] lists invalid " \
+                            "function [%s]."
 ERR_LICENSE = "file does not include required license header."
+ERR_LICENSE_FILE_NOT_FOUND = "License file [%s] could not be found."
 ERR_NO_EOL_AT_EOF = "file does not end with EOL."
 ERR_PATH_IS_NOT_DIRECTORY = "%s: [%s] is not a valid directory.\n"
 ERR_REQUIRED_SECTION = "Configuration file missing required section: [%s]"
 ERR_SYMBOLIC_LINK = "file is a symbolic link."
 ERR_TABS = "line contains tabs."
 ERR_TRAILING_WHITESPACE = "line has trailing whitespaces."
-ERR_LICENSE_FILE_NOT_FOUND = "License file [%s] could not be found."
-ERR_INVALID_SCAN_FUNCTION = "Config. file filter [%s] lists invalid " \
-                            "function [%s]."
+
 HELP_CONFIG_FILE = "provide custom configuration file"
 HELP_DISPLAY_EXCLUSIONS = "display path exclusion information"
 HELP_ROOT_DIR = "starting directory for the scan"
 HELP_VERBOSE = "enable verbose output"
+
 MSG_CHECKING_FILE = "  [%s]..."
 MSG_CHECKS_PASSED = "All checks passed."
 MSG_CONFIG_ADDING_LICENSE_FILE = "Adding valid license from: [%s], value:\n%s"
@@ -75,11 +79,13 @@ MSG_RUNNING_FILE_CHECKS = "    Running File Check [%s]"
 MSG_RUNNING_LINE_CHECKS = "    Running Line Check [%s]"
 MSG_SCANNING_FILTER = "Scanning files with filter: [%s]:"
 MSG_SCANNING_STARTED = "Scanning files starting at [%s]..."
+
 WARN_CONFIG_SECTION_NOT_FOUND = "Configuration file section [%s] not found."
 WARN_SCAN_EXCLUDED_PATH_SUMMARY = "Scan excluded (%s) directories:"
 WARN_SCAN_EXCLUDED_FILE_SUMMARY = "Scan excluded (%s) files:"
 WARN_SCAN_EXCLUDED_FILE = "  Excluded file: %s"
 WARN_SCAN_EXCLUDED_PATH = "  Excluded path: %s"
+
 MSG_DESCRIPTION = "Scans all source code under specified directory for " \
                   "project compliance using provided configuration."
 
@@ -94,6 +100,7 @@ SECTION_LICENSE = "Licenses"
 SECTION_EXCLUDE = "Excludes"
 SECTION_INCLUDE = "Includes"
 SECTION_OPTIONS = "Options"
+SECTION_REGEX = "Regex"
 
 # Configuration Options known keys
 OPT_LICENSE_SLACK_LEN = "license_slack_length"
@@ -101,7 +108,14 @@ OPT_LICENSE_SLACK_LEN = "license_slack_length"
 # Globals
 """Hold valid license headers within an array strings."""
 valid_licenses = []
+
+"""Paths to exclude from directory search."""
 exclusion_paths = []
+
+"""Regex. patterns to search for."""
+regex_patterns = []
+
+"""globals."""
 exclusion_files_set = set()
 license_search_slack_len = DEFAULT_LICENSE_SEARCH_SLACK
 FILE_CHECK_FUNCTIONS = dict()
@@ -224,6 +238,19 @@ def read_scan_options(config):
         raise Exception(ERR_REQUIRED_SECTION % SECTION_OPTIONS)
 
 
+def read_regex(config):
+    """Read the Regular Expressions from the configuration file."""
+    options_dict = get_config_section_dict(config, SECTION_REGEX)
+    # vprint("options_dict: " + str(options_dict))
+    if options_dict is not None:
+        # each key is a regex string
+        for pattern in options_dict:
+            if pattern is not None:
+                regex_patterns.append(pattern)
+    else:
+        raise Exception(ERR_REQUIRED_SECTION % SECTION_REGEX)
+
+
 def read_config_file(file):
     """Read in and validate configuration file."""
     try:
@@ -238,6 +265,7 @@ def read_config_file(file):
         read_path_inclusions(config)
         read_path_exclusions(config)
         read_scan_options(config)
+        read_regex(config)
     except Exception as e:
         print_error(e)
         return -1
@@ -300,6 +328,16 @@ def is_not_symlink(path):
         return None
 
 
+def regex_check(line):
+    """Assert line does not contain strings matching regex. expressions."""
+    # vprint("regex pattern: " + str(regex_patterns))
+    for pattern in regex_patterns:
+        if re.search(pattern, line):
+            return ERR_REGEX
+        else:
+            return None
+
+
 # Note: this function must appear after all "check" functions are defined
 def read_path_inclusions(config):
     """Read the list of paths to include in scan tests."""
@@ -469,7 +507,8 @@ if __name__ == "__main__":
     LINE_CHECK_FUNCTIONS.update({
         "no_tabs": no_tabs,
         "no_trailing_spaces": no_trailing_spaces,
-        "eol_at_eof": eol_at_eof
+        "eol_at_eof": eol_at_eof,
+        "regex_check": regex_check
     })
 
     # Read / load configuration file from file (pointer)
diff --git a/scancode/travis.cfg b/scancode/travis.cfg
index c50e2ac..ee9e988 100644
--- a/scancode/travis.cfg
+++ b/scancode/travis.cfg
@@ -20,7 +20,7 @@ ApacheIBMLicenseHeader.txt
 *.js=no_tabs, no_trailing_spaces, eol_at_eof
 *.gradle=no_tabs, no_trailing_spaces, eol_at_eof
 *.md=no_tabs, eol_at_eof
-*.go=has_block_license, no_tabs, no_trailing_spaces, eol_at_eof
+*.go=has_block_license, no_trailing_spaces, eol_at_eof
 *.lua=has_block_license
 build.xml=no_tabs, no_trailing_spaces, eol_at_eof
 deploy.xml=no_tabs, no_trailing_spaces, eol_at_eof
@@ -30,6 +30,9 @@ deploy.xml=no_tabs, no_trailing_spaces, eol_at_eof
 # General exclusions
 .tox
 .git
+.bin
+
+# Unit tests
 tests/bad
 tests/mixed
 tests/MixedCase
@@ -41,3 +44,7 @@ tests/exclude
 # within first 'x' characters of each code file (as provided by this option's
 # value).
 LICENSE_SLACK_LENGTH=500
+
+# List of regular expressions for forbidden strings, e.g. \w+@company.com
+[Regex]
+\w+@company.com

-- 
To stop receiving notification emails like this one, please contact
['"commits@openwhisk.apache.org" <co...@openwhisk.apache.org>'].