You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@openwhisk.apache.org by cs...@apache.org on 2019/04/03 02:36:47 UTC
[incubator-openwhisk-utilities] branch master updated: Fix
exclusion directory and file matching and add support for reading a top
level gitignore file (#57)
This is an automated email from the ASF dual-hosted git repository.
csantanapr pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-openwhisk-utilities.git
The following commit(s) were added to refs/heads/master by this push:
new e1faec9 Fix exclusion directory and file matching and add support for reading a top level gitignore file (#57)
e1faec9 is described below
commit e1faec929990f9107238c5287bae2209eca1b1a2
Author: rodric rabbah <ro...@gmail.com>
AuthorDate: Tue Apr 2 22:36:42 2019 -0400
Fix exclusion directory and file matching and add support for reading a top level gitignore file (#57)
---
.gitignore | 3 +
LICENSE.txt | 12 ++
README.md | 4 +-
licenses/LICENSE-pathspec.txt | 374 ++++++++++++++++++++++++++++++++++++++++++
scancode/lib/compat.py | 45 +++++
scancode/lib/gitwildmatch.py | 325 ++++++++++++++++++++++++++++++++++++
scancode/lib/pathspec.py | 146 +++++++++++++++++
scancode/lib/pattern.py | 155 +++++++++++++++++
scancode/lib/util.py | 359 ++++++++++++++++++++++++++++++++++++++++
scancode/scanCode.py | 57 ++++---
scancode/travis.cfg | 3 +
11 files changed, 1455 insertions(+), 28 deletions(-)
diff --git a/.gitignore b/.gitignore
index 23c57c9..fd8ce7c 100644
--- a/.gitignore
+++ b/.gitignore
@@ -26,3 +26,6 @@ ansible/roles/nginx/files/*cert.pem
# .zip files must be explicited whitelisted
*.zip
+
+# .pyc files
+*.pyc
diff --git a/LICENSE.txt b/LICENSE.txt
index a2fe52f..23fba0a 100644
--- a/LICENSE.txt
+++ b/LICENSE.txt
@@ -200,3 +200,15 @@
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
+
+
+========================================================================
+Mozilla Public License 2.0
+========================================================================
+
+This distribution bundles the following component, which is
+available under an Mozilla Public License 2.0
+(https://www.mozilla.org/en-US/MPL/2.0/).
+
+Pathspec 0.5.9 (https://pypi.org/project/pathspec/) under scanCode/lib.
+License included at licenses/LICENSE-pathspec.txt.
diff --git a/README.md b/README.md
index 93c7de3..fecac52 100644
--- a/README.md
+++ b/README.md
@@ -47,7 +47,7 @@ Scan detected 3 error(s) in 1 file(s):
To make sure this never happens to you, you can run the same tests on your local machine every time you commit changes.
-1. Clone the OpenWhisk utilities project repo.:
+1. Clone the OpenWhisk utilities project:
```bash
$ git clone https://github.com/apache/incubator-openwhisk-utilities.git
```
@@ -66,7 +66,7 @@ $ cat /path/to/openwhisk/.git/hooks/pre-commit
# determine openwhisk base directory
root="$(git rev-parse --show-toplevel)"
-python /path/to/incubator-openwhisk-utilities/scancode/scanCode.py . --config $root/tools/
+python /path/to/incubator-openwhisk-utilities/scancode/scanCode.py . --config $root/tools/ --gitignore $root/.gitignore
```
_Note_: A hook a locally installed, so if you check out the repository again, you will need to reinstall it.
diff --git a/licenses/LICENSE-pathspec.txt b/licenses/LICENSE-pathspec.txt
new file mode 100644
index 0000000..52d1351
--- /dev/null
+++ b/licenses/LICENSE-pathspec.txt
@@ -0,0 +1,374 @@
+Mozilla Public License Version 2.0
+==================================
+
+1. Definitions
+--------------
+
+1.1. "Contributor"
+ means each individual or legal entity that creates, contributes to
+ the creation of, or owns Covered Software.
+
+1.2. "Contributor Version"
+ means the combination of the Contributions of others (if any) used
+ by a Contributor and that particular Contributor's Contribution.
+
+1.3. "Contribution"
+ means Covered Software of a particular Contributor.
+
+1.4. "Covered Software"
+ means Source Code Form to which the initial Contributor has attached
+ the notice in Exhibit A, the Executable Form of such Source Code
+ Form, and Modifications of such Source Code Form, in each case
+ including portions thereof.
+
+1.5. "Incompatible With Secondary Licenses"
+ means
+
+ (a) that the initial Contributor has attached the notice described
+ in Exhibit B to the Covered Software; or
+
+ (b) that the Covered Software was made available under the terms of
+ version 1.1 or earlier of the License, but not also under the
+ terms of a Secondary License.
+
+1.6. "Executable Form"
+ means any form of the work other than Source Code Form.
+
+1.7. "Larger Work"
+ means a work that combines Covered Software with other material, in
+ a separate file or files, that is not Covered Software.
+
+1.8. "License"
+ means this document.
+
+1.9. "Licensable"
+ means having the right to grant, to the maximum extent possible,
+ whether at the time of the initial grant or subsequently, any and
+ all of the rights conveyed by this License.
+
+1.10. "Modifications"
+ means any of the following:
+
+ (a) any file in Source Code Form that results from an addition to,
+ deletion from, or modification of the contents of Covered
+ Software; or
+
+ (b) any new file in Source Code Form that contains any Covered
+ Software.
+
+1.11. "Patent Claims" of a Contributor
+ means any patent claim(s), including without limitation, method,
+ process, and apparatus claims, in any patent Licensable by such
+ Contributor that would be infringed, but for the grant of the
+ License, by the making, using, selling, offering for sale, having
+ made, import, or transfer of either its Contributions or its
+ Contributor Version.
+
+1.12. "Secondary License"
+ means either the GNU General Public License, Version 2.0, the GNU
+ Lesser General Public License, Version 2.1, the GNU Affero General
+ Public License, Version 3.0, or any later versions of those
+ licenses.
+
+1.13. "Source Code Form"
+ means the form of the work preferred for making modifications.
+
+1.14. "You" (or "Your")
+ means an individual or a legal entity exercising rights under this
+ License. For legal entities, "You" includes any entity that
+ controls, is controlled by, or is under common control with You. For
+ purposes of this definition, "control" means (a) the power, direct
+ or indirect, to cause the direction or management of such entity,
+ whether by contract or otherwise, or (b) ownership of more than
+ fifty percent (50%) of the outstanding shares or beneficial
+ ownership of such entity.
+
+2. License Grants and Conditions
+--------------------------------
+
+2.1. Grants
+
+Each Contributor hereby grants You a world-wide, royalty-free,
+non-exclusive license:
+
+(a) under intellectual property rights (other than patent or trademark)
+ Licensable by such Contributor to use, reproduce, make available,
+ modify, display, perform, distribute, and otherwise exploit its
+ Contributions, either on an unmodified basis, with Modifications, or
+ as part of a Larger Work; and
+
+(b) under Patent Claims of such Contributor to make, use, sell, offer
+ for sale, have made, import, and otherwise transfer either its
+ Contributions or its Contributor Version.
+
+2.2. Effective Date
+
+The licenses granted in Section 2.1 with respect to any Contribution
+become effective for each Contribution on the date the Contributor first
+distributes such Contribution.
+
+2.3. Limitations on Grant Scope
+
+The licenses granted in this Section 2 are the only rights granted under
+this License. No additional rights or licenses will be implied from the
+distribution or licensing of Covered Software under this License.
+Notwithstanding Section 2.1(b) above, no patent license is granted by a
+Contributor:
+
+(a) for any code that a Contributor has removed from Covered Software;
+ or
+
+(b) for infringements caused by: (i) Your and any other third party's
+ modifications of Covered Software, or (ii) the combination of its
+ Contributions with other software (except as part of its Contributor
+ Version); or
+
+(c) under Patent Claims infringed by Covered Software in the absence of
+ its Contributions.
+
+This License does not grant any rights in the trademarks, service marks,
+or logos of any Contributor (except as may be necessary to comply with
+the notice requirements in Section 3.4).
+
+2.4. Subsequent Licenses
+
+No Contributor makes additional grants as a result of Your choice to
+distribute the Covered Software under a subsequent version of this
+License (see Section 10.2) or under the terms of a Secondary License (if
+permitted under the terms of Section 3.3).
+
+2.5. Representation
+
+Each Contributor represents that the Contributor believes its
+Contributions are its original creation(s) or it has sufficient rights
+to grant the rights to its Contributions conveyed by this License.
+
+2.6. Fair Use
+
+This License is not intended to limit any rights You have under
+applicable copyright doctrines of fair use, fair dealing, or other
+equivalents.
+
+2.7. Conditions
+
+Sections 3.1, 3.2, 3.3, and 3.4 are conditions of the licenses granted
+in Section 2.1.
+
+3. Responsibilities
+-------------------
+
+3.1. Distribution of Source Form
+
+All distribution of Covered Software in Source Code Form, including any
+Modifications that You create or to which You contribute, must be under
+the terms of this License. You must inform recipients that the Source
+Code Form of the Covered Software is governed by the terms of this
+License, and how they can obtain a copy of this License. You may not
+attempt to alter or restrict the recipients' rights in the Source Code
+Form.
+
+3.2. Distribution of Executable Form
+
+If You distribute Covered Software in Executable Form then:
+
+(a) such Covered Software must also be made available in Source Code
+ Form, as described in Section 3.1, and You must inform recipients of
+ the Executable Form how they can obtain a copy of such Source Code
+ Form by reasonable means in a timely manner, at a charge no more
+ than the cost of distribution to the recipient; and
+
+(b) You may distribute such Executable Form under the terms of this
+ License, or sublicense it under different terms, provided that the
+ license for the Executable Form does not attempt to limit or alter
+ the recipients' rights in the Source Code Form under this License.
+
+3.3. Distribution of a Larger Work
+
+You may create and distribute a Larger Work under terms of Your choice,
+provided that You also comply with the requirements of this License for
+the Covered Software. If the Larger Work is a combination of Covered
+Software with a work governed by one or more Secondary Licenses, and the
+Covered Software is not Incompatible With Secondary Licenses, this
+License permits You to additionally distribute such Covered Software
+under the terms of such Secondary License(s), so that the recipient of
+the Larger Work may, at their option, further distribute the Covered
+Software under the terms of either this License or such Secondary
+License(s).
+
+3.4. Notices
+
+You may not remove or alter the substance of any license notices
+(including copyright notices, patent notices, disclaimers of warranty,
+or limitations of liability) contained within the Source Code Form of
+the Covered Software, except that You may alter any license notices to
+the extent required to remedy known factual inaccuracies.
+
+3.5. Application of Additional Terms
+
+You may choose to offer, and to charge a fee for, warranty, support,
+indemnity or liability obligations to one or more recipients of Covered
+Software. However, You may do so only on Your own behalf, and not on
+behalf of any Contributor. You must make it absolutely clear that any
+such warranty, support, indemnity, or liability obligation is offered by
+You alone, and You hereby agree to indemnify every Contributor for any
+liability incurred by such Contributor as a result of warranty, support,
+indemnity or liability terms You offer. You may include additional
+disclaimers of warranty and limitations of liability specific to any
+jurisdiction.
+
+4. Inability to Comply Due to Statute or Regulation
+---------------------------------------------------
+
+If it is impossible for You to comply with any of the terms of this
+License with respect to some or all of the Covered Software due to
+statute, judicial order, or regulation then You must: (a) comply with
+the terms of this License to the maximum extent possible; and (b)
+describe the limitations and the code they affect. Such description must
+be placed in a text file included with all distributions of the Covered
+Software under this License. Except to the extent prohibited by statute
+or regulation, such description must be sufficiently detailed for a
+recipient of ordinary skill to be able to understand it.
+
+5. Termination
+--------------
+
+5.1. The rights granted under this License will terminate automatically
+if You fail to comply with any of its terms. However, if You become
+compliant, then the rights granted under this License from a particular
+Contributor are reinstated (a) provisionally, unless and until such
+Contributor explicitly and finally terminates Your grants, and (b) on an
+ongoing basis, if such Contributor fails to notify You of the
+non-compliance by some reasonable means prior to 60 days after You have
+come back into compliance. Moreover, Your grants from a particular
+Contributor are reinstated on an ongoing basis if such Contributor
+notifies You of the non-compliance by some reasonable means, this is the
+first time You have received notice of non-compliance with this License
+from such Contributor, and You become compliant prior to 30 days after
+Your receipt of the notice.
+
+5.2. If You initiate litigation against any entity by asserting a patent
+infringement claim (excluding declaratory judgment actions,
+counter-claims, and cross-claims) alleging that a Contributor Version
+directly or indirectly infringes any patent, then the rights granted to
+You by any and all Contributors for the Covered Software under Section
+2.1 of this License shall terminate.
+
+5.3. In the event of termination under Sections 5.1 or 5.2 above, all
+end user license agreements (excluding distributors and resellers) which
+have been validly granted by You or Your distributors under this License
+prior to termination shall survive termination.
+
+************************************************************************
+* *
+* 6. Disclaimer of Warranty *
+* ------------------------- *
+* *
+* Covered Software is provided under this License on an "as is" *
+* basis, without warranty of any kind, either expressed, implied, or *
+* statutory, including, without limitation, warranties that the *
+* Covered Software is free of defects, merchantable, fit for a *
+* particular purpose or non-infringing. The entire risk as to the *
+* quality and performance of the Covered Software is with You. *
+* Should any Covered Software prove defective in any respect, You *
+* (not any Contributor) assume the cost of any necessary servicing, *
+* repair, or correction. This disclaimer of warranty constitutes an *
+* essential part of this License. No use of any Covered Software is *
+* authorized under this License except under this disclaimer. *
+* *
+************************************************************************
+
+************************************************************************
+* *
+* 7. Limitation of Liability *
+* -------------------------- *
+* *
+* Under no circumstances and under no legal theory, whether tort *
+* (including negligence), contract, or otherwise, shall any *
+* Contributor, or anyone who distributes Covered Software as *
+* permitted above, be liable to You for any direct, indirect, *
+* special, incidental, or consequential damages of any character *
+* including, without limitation, damages for lost profits, loss of *
+* goodwill, work stoppage, computer failure or malfunction, or any *
+* and all other commercial damages or losses, even if such party *
+* shall have been informed of the possibility of such damages. This *
+* limitation of liability shall not apply to liability for death or *
+* personal injury resulting from such party's negligence to the *
+* extent applicable law prohibits such limitation. Some *
+* jurisdictions do not allow the exclusion or limitation of *
+* incidental or consequential damages, so this exclusion and *
+* limitation may not apply to You. *
+* *
+************************************************************************
+
+8. Litigation
+-------------
+
+Any litigation relating to this License may be brought only in the
+courts of a jurisdiction where the defendant maintains its principal
+place of business and such litigation shall be governed by laws of that
+jurisdiction, without reference to its conflict-of-law provisions.
+Nothing in this Section shall prevent a party's ability to bring
+cross-claims or counter-claims.
+
+9. Miscellaneous
+----------------
+
+This License represents the complete agreement concerning the subject
+matter hereof. If any provision of this License is held to be
+unenforceable, such provision shall be reformed only to the extent
+necessary to make it enforceable. Any law or regulation which provides
+that the language of a contract shall be construed against the drafter
+shall not be used to construe this License against a Contributor.
+
+10. Versions of the License
+---------------------------
+
+10.1. New Versions
+
+Mozilla Foundation is the license steward. Except as provided in Section
+10.3, no one other than the license steward has the right to modify or
+publish new versions of this License. Each version will be given a
+distinguishing version number.
+
+10.2. Effect of New Versions
+
+You may distribute the Covered Software under the terms of the version
+of the License under which You originally received the Covered Software,
+or under the terms of any subsequent version published by the license
+steward.
+
+10.3. Modified Versions
+
+If you create software not governed by this License, and you want to
+create a new license for such software, you may create and use a
+modified version of this License if you rename the license and remove
+any references to the name of the license steward (except to note that
+such modified license differs from this License).
+
+10.4. Distributing Source Code Form that is Incompatible With Secondary
+Licenses
+
+If You choose to distribute Source Code Form that is Incompatible With
+Secondary Licenses under the terms of this version of the License, the
+notice described in Exhibit B of this License must be attached.
+
+Exhibit A - Source Code Form License Notice
+-------------------------------------------
+
+ This Source Code Form is subject to the terms of the Mozilla Public
+ License, v. 2.0. If a copy of the MPL was not distributed with this
+ file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+If it is not possible or desirable to put the notice in a particular
+file, then You may include the notice in a location (such as a LICENSE
+file in a relevant directory) where a recipient would be likely to look
+for such a notice.
+
+You may add additional accurate notices of copyright ownership.
+
+Exhibit B - "Incompatible With Secondary Licenses" Notice
+---------------------------------------------------------
+
+ This Source Code Form is "Incompatible With Secondary Licenses", as
+ defined by the Mozilla Public License, v. 2.0.
+
diff --git a/scancode/lib/compat.py b/scancode/lib/compat.py
new file mode 100644
index 0000000..540599b
--- /dev/null
+++ b/scancode/lib/compat.py
@@ -0,0 +1,45 @@
+# encoding: utf-8
+#
+# This is a copy of source code from Pathspec 0.5.9
+# (https://pypi.org/project/pathspec/) which is
+# available under an Mozilla Public License 2.0
+# (https://www.mozilla.org/en-US/MPL/2.0/).
+# A copy of the license is also available in
+# ../../licenses/LICENSE-pathspec.txt.
+#
+"""
+This module provides compatibility between Python 2 and 3. Hardly
+anything is used by this project to constitute including `six`_.
+
+.. _`six`: http://pythonhosted.org/six
+"""
+
+import sys
+
+if sys.version_info[0] < 3:
+ # Python 2.
+ unicode = unicode
+ string_types = (basestring,)
+
+ from itertools import izip_longest
+
+ def iterkeys(mapping):
+ return mapping.iterkeys()
+
+else:
+ # Python 3.
+ unicode = str
+ string_types = (unicode,)
+
+ from itertools import zip_longest as izip_longest
+
+ def iterkeys(mapping):
+ return mapping.keys()
+
+try:
+ # Python 3.6+.
+ from collections.abc import Collection as collection_type
+except ImportError:
+ # Python 2.7 - 3.5.
+ from collections import Container as collection_type
+
diff --git a/scancode/lib/gitwildmatch.py b/scancode/lib/gitwildmatch.py
new file mode 100644
index 0000000..5076bd3
--- /dev/null
+++ b/scancode/lib/gitwildmatch.py
@@ -0,0 +1,325 @@
+# encoding: utf-8
+#
+# This is a copy of source code from Pathspec 0.5.9
+# (https://pypi.org/project/pathspec/) which is
+# available under an Mozilla Public License 2.0
+# (https://www.mozilla.org/en-US/MPL/2.0/).
+# A copy of the license is also available in
+# ../../licenses/LICENSE-pathspec.txt.
+#
+"""
+This module implements Git's wildmatch pattern matching which itself is
+derived from Rsync's wildmatch. Git uses wildmatch for its ".gitignore"
+files.
+"""
+
+from __future__ import unicode_literals
+
+import re
+import warnings
+
+import util
+from compat import unicode
+from pattern import RegexPattern
+
+#: The encoding to use when parsing a byte string pattern.
+_BYTES_ENCODING = 'latin1'
+
+
+class GitWildMatchPattern(RegexPattern):
+ """
+ The :class:`GitWildMatchPattern` class represents a compiled Git
+ wildmatch pattern.
+ """
+
+ # Keep the dict-less class hierarchy.
+ __slots__ = ()
+
+ @classmethod
+ def pattern_to_regex(cls, pattern):
+ """
+ Convert the pattern into a regular expression.
+
+ *pattern* (:class:`unicode` or :class:`bytes`) is the pattern to
+ convert into a regular expression.
+
+ Returns the uncompiled regular expression (:class:`unicode`, :class:`bytes`,
+ or :data:`None`), and whether matched files should be included
+ (:data:`True`), excluded (:data:`False`), or if it is a
+ null-operation (:data:`None`).
+ """
+ if isinstance(pattern, unicode):
+ return_type = unicode
+ elif isinstance(pattern, bytes):
+ return_type = bytes
+ pattern = pattern.decode(_BYTES_ENCODING)
+ else:
+ raise TypeError("pattern:{!r} is not a unicode or byte string.".format(pattern))
+
+ pattern = pattern.strip()
+
+ if pattern.startswith('#'):
+ # A pattern starting with a hash ('#') serves as a comment
+ # (neither includes nor excludes files). Escape the hash with a
+ # back-slash to match a literal hash (i.e., '\#').
+ regex = None
+ include = None
+
+ elif pattern == '/':
+ # EDGE CASE: According to `git check-ignore` (v2.4.1), a single
+ # '/' does not match any file.
+ regex = None
+ include = None
+
+ elif pattern:
+
+ if pattern.startswith('!'):
+ # A pattern starting with an exclamation mark ('!') negates the
+ # pattern (exclude instead of include). Escape the exclamation
+ # mark with a back-slash to match a literal exclamation mark
+ # (i.e., '\!').
+ include = False
+ # Remove leading exclamation mark.
+ pattern = pattern[1:]
+ else:
+ include = True
+
+ if pattern.startswith('\\'):
+ # Remove leading back-slash escape for escaped hash ('#') or
+ # exclamation mark ('!').
+ pattern = pattern[1:]
+
+ # Split pattern into segments.
+ pattern_segs = pattern.split('/')
+
+ # Normalize pattern to make processing easier.
+
+ if not pattern_segs[0]:
+ # A pattern beginning with a slash ('/') will only match paths
+ # directly on the root directory instead of any descendant
+ # paths. So, remove empty first segment to make pattern relative
+ # to root.
+ del pattern_segs[0]
+
+ elif len(pattern_segs) == 1 or (len(pattern_segs) == 2 and not pattern_segs[1]):
+ # A single pattern without a beginning slash ('/') will match
+ # any descendant path. This is equivalent to "**/{pattern}". So,
+ # prepend with double-asterisks to make pattern relative to
+ # root.
+ # EDGE CASE: This also holds for a single pattern with a
+ # trailing slash (e.g. dir/).
+ if pattern_segs[0] != '**':
+ pattern_segs.insert(0, '**')
+
+ else:
+ # EDGE CASE: A pattern without a beginning slash ('/') but
+ # contains at least one prepended directory (e.g.
+ # "dir/{pattern}") should not match "**/dir/{pattern}",
+ # according to `git check-ignore` (v2.4.1).
+ pass
+
+ if not pattern_segs[-1] and len(pattern_segs) > 1:
+ # A pattern ending with a slash ('/') will match all descendant
+ # paths if it is a directory but not if it is a regular file.
+ # This is equivilent to "{pattern}/**". So, set last segment to
+ # double asterisks to include all descendants.
+ pattern_segs[-1] = '**'
+
+ # Build regular expression from pattern.
+ output = ['^']
+ need_slash = False
+ end = len(pattern_segs) - 1
+ for i, seg in enumerate(pattern_segs):
+ if seg == '**':
+ if i == 0 and i == end:
+ # A pattern consisting solely of double-asterisks ('**')
+ # will match every path.
+ output.append('.+')
+ elif i == 0:
+ # A normalized pattern beginning with double-asterisks
+ # ('**') will match any leading path segments.
+ output.append('(?:.+/)?')
+ need_slash = False
+ elif i == end:
+ # A normalized pattern ending with double-asterisks ('**')
+ # will match any trailing path segments.
+ output.append('/.*')
+ else:
+ # A pattern with inner double-asterisks ('**') will match
+ # multiple (or zero) inner path segments.
+ output.append('(?:/.+)?')
+ need_slash = True
+ elif seg == '*':
+ # Match single path segment.
+ if need_slash:
+ output.append('/')
+ output.append('[^/]+')
+ need_slash = True
+ else:
+ # Match segment glob pattern.
+ if need_slash:
+ output.append('/')
+ output.append(cls._translate_segment_glob(seg))
+ if i == end and include is True:
+ # A pattern ending without a slash ('/') will match a file
+ # or a directory (with paths underneath it). E.g., "foo"
+ # matches "foo", "foo/bar", "foo/bar/baz", etc.
+ # EDGE CASE: However, this does not hold for exclusion cases
+ # according to `git check-ignore` (v2.4.1).
+ output.append('(?:/.*)?')
+ need_slash = True
+ output.append('$')
+ regex = ''.join(output)
+
+ else:
+ # A blank pattern is a null-operation (neither includes nor
+ # excludes files).
+ regex = None
+ include = None
+
+ if regex is not None and return_type is bytes:
+ regex = regex.encode(_BYTES_ENCODING)
+
+ return regex, include
+
+ @staticmethod
+ def _translate_segment_glob(pattern):
+ """
+ Translates the glob pattern to a regular expression. This is used in
+ the constructor to translate a path segment glob pattern to its
+ corresponding regular expression.
+
+ *pattern* (:class:`str`) is the glob pattern.
+
+ Returns the regular expression (:class:`str`).
+ """
+ # NOTE: This is derived from `fnmatch.translate()` and is similar to
+ # the POSIX function `fnmatch()` with the `FNM_PATHNAME` flag set.
+
+ escape = False
+ regex = ''
+ i, end = 0, len(pattern)
+ while i < end:
+ # Get next character.
+ char = pattern[i]
+ i += 1
+
+ if escape:
+ # Escape the character.
+ escape = False
+ regex += re.escape(char)
+
+ elif char == '\\':
+ # Escape character, escape next character.
+ escape = True
+
+ elif char == '*':
+ # Multi-character wildcard. Match any string (except slashes),
+ # including an empty string.
+ regex += '[^/]*'
+
+ elif char == '?':
+ # Single-character wildcard. Match any single character (except
+ # a slash).
+ regex += '[^/]'
+
+ elif char == '[':
+ # Braket expression wildcard. Except for the beginning
+ # exclamation mark, the whole braket expression can be used
+ # directly as regex but we have to find where the expression
+ # ends.
+ # - "[][!]" matchs ']', '[' and '!'.
+ # - "[]-]" matchs ']' and '-'.
+ # - "[!]a-]" matchs any character except ']', 'a' and '-'.
+ j = i
+ # Pass brack expression negation.
+ if j < end and pattern[j] == '!':
+ j += 1
+ # Pass first closing braket if it is at the beginning of the
+ # expression.
+ if j < end and pattern[j] == ']':
+ j += 1
+ # Find closing braket. Stop once we reach the end or find it.
+ while j < end and pattern[j] != ']':
+ j += 1
+
+ if j < end:
+ # Found end of braket expression. Increment j to be one past
+ # the closing braket:
+ #
+ # [...]
+ # ^ ^
+ # i j
+ #
+ j += 1
+ expr = '['
+
+ if pattern[i] == '!':
+ # Braket expression needs to be negated.
+ expr += '^'
+ i += 1
+ elif pattern[i] == '^':
+ # POSIX declares that the regex braket expression negation
+ # "[^...]" is undefined in a glob pattern. Python's
+ # `fnmatch.translate()` escapes the caret ('^') as a
+ # literal. To maintain consistency with undefined behavior,
+ # I am escaping the '^' as well.
+ expr += '\\^'
+ i += 1
+
+ # Build regex braket expression. Escape slashes so they are
+ # treated as literal slashes by regex as defined by POSIX.
+ expr += pattern[i:j].replace('\\', '\\\\')
+
+ # Add regex braket expression to regex result.
+ regex += expr
+
+ # Set i to one past the closing braket.
+ i = j
+
+ else:
+ # Failed to find closing braket, treat opening braket as a
+ # braket literal instead of as an expression.
+ regex += '\\['
+
+ else:
+ # Regular character, escape it for regex.
+ regex += re.escape(char)
+
+ return regex
+
+util.register_pattern('gitwildmatch', GitWildMatchPattern)
+
+
+class GitIgnorePattern(GitWildMatchPattern):
+ """
+ The :class:`GitIgnorePattern` class is deprecated by :class:`GitWildMatchPattern`.
+ This class only exists to maintain compatibility with v0.4.
+ """
+
+ def __init__(self, *args, **kw):
+ """
+ Warn about deprecation.
+ """
+ self._deprecated()
+ return super(GitIgnorePattern, self).__init__(*args, **kw)
+
+ @staticmethod
+ def _deprecated():
+ """
+ Warn about deprecation.
+ """
+ warnings.warn("GitIgnorePattern ('gitignore') is deprecated. Use GitWildMatchPattern ('gitwildmatch') instead.", DeprecationWarning, stacklevel=3)
+
+ @classmethod
+ def pattern_to_regex(cls, *args, **kw):
+ """
+ Warn about deprecation.
+ """
+ cls._deprecated()
+ return super(GitIgnorePattern, cls).pattern_to_regex(*args, **kw)
+
+# Register `GitIgnorePattern` as "gitignore" for backward compatibility
+# with v0.4.
+util.register_pattern('gitignore', GitIgnorePattern)
+
diff --git a/scancode/lib/pathspec.py b/scancode/lib/pathspec.py
new file mode 100644
index 0000000..da08db1
--- /dev/null
+++ b/scancode/lib/pathspec.py
@@ -0,0 +1,146 @@
+# encoding: utf-8
+#
+# This is a copy of source code from Pathspec 0.5.9
+# (https://pypi.org/project/pathspec/) which is
+# available under an Mozilla Public License 2.0
+# (https://www.mozilla.org/en-US/MPL/2.0/).
+# A copy of the license is also available in
+# ../../licenses/LICENSE-pathspec.txt.
+#
+"""
+This module provides an object oriented interface for pattern matching
+of files.
+"""
+
+import util
+from compat import collection_type, iterkeys, izip_longest, string_types, unicode
+
+
+class PathSpec(object):
+ """
+ The :class:`PathSpec` class is a wrapper around a list of compiled
+ :class:`.Pattern` instances.
+ """
+
+ def __init__(self, patterns):
+ """
+ Initializes the :class:`PathSpec` instance.
+
+ *patterns* (:class:`~collections.abc.Collection` or :class:`~collections.abc.Iterable`)
+ yields each compiled pattern (:class:`.Pattern`).
+ """
+
+ self.patterns = patterns if isinstance(patterns, collection_type) else list(patterns)
+ """
+ *patterns* (:class:`~collections.abc.Collection` of :class:`.Pattern`)
+ contains the compiled patterns.
+ """
+
+ def __eq__(self, other):
+ """
+ Tests the equality of this path-spec with *other* (:class:`PathSpec`)
+ by comparing their :attr:`~PathSpec.patterns` attributes.
+ """
+ if isinstance(other, PathSpec):
+ paired_patterns = izip_longest(self.patterns, other.patterns)
+ return all(a == b for a, b in paired_patterns)
+ else:
+ return NotImplemented
+
+ def __len__(self):
+ """
+ Returns the number of compiled patterns this path-spec contains
+ (:class:`int`).
+ """
+ return len(self.patterns)
+
+ @classmethod
+ def from_lines(cls, pattern_factory, lines):
+ """
+ Compiles the pattern lines.
+
+ *pattern_factory* can be either the name of a registered pattern
+ factory (:class:`str`), or a :class:`~collections.abc.Callable` used
+ to compile patterns. It must accept an uncompiled pattern (:class:`str`)
+ and return the compiled pattern (:class:`.Pattern`).
+
+ *lines* (:class:`~collections.abc.Iterable`) yields each uncompiled
+ pattern (:class:`str`). This simply has to yield each line so it can
+ be a :class:`file` (e.g., from :func:`open` or :class:`io.StringIO`)
+ or the result from :meth:`str.splitlines`.
+
+ Returns the :class:`PathSpec` instance.
+ """
+ if isinstance(pattern_factory, string_types):
+ pattern_factory = util.lookup_pattern(pattern_factory)
+ if not callable(pattern_factory):
+ raise TypeError("pattern_factory:{!r} is not callable.".format(pattern_factory))
+
+ if isinstance(lines, (bytes, unicode)):
+ raise TypeError("lines:{!r} is not an iterable.".format(lines))
+
+ lines = [pattern_factory(line) for line in lines if line]
+ return cls(lines)
+
+ def match_file(self, file, separators=None):
+ """
+ Matches the file to this path-spec.
+
+ *file* (:class:`str`) is the file path to be matched against
+ :attr:`self.patterns <PathSpec.patterns>`.
+
+ *separators* (:class:`~collections.abc.Collection` of :class:`str`)
+ optionally contains the path separators to normalize. See
+ :func:`~pathspec.util.normalize_file` for more information.
+
+ Returns :data:`True` if *file* matched; otherwise, :data:`False`.
+ """
+ norm_file = util.normalize_file(file, separators=separators)
+ return util.match_file(self.patterns, norm_file)
+
+ def match_files(self, files, separators=None):
+ """
+ Matches the files to this path-spec.
+
+ *files* (:class:`~collections.abc.Iterable` of :class:`str`) contains
+ the file paths to be matched against :attr:`self.patterns
+ <PathSpec.patterns>`.
+
+ *separators* (:class:`~collections.abc.Collection` of :class:`str`;
+ or :data:`None`) optionally contains the path separators to
+ normalize. See :func:`~pathspec.util.normalize_file` for more
+ information.
+
+ Returns the matched files (:class:`~collections.abc.Iterable` of
+ :class:`str`).
+ """
+ if isinstance(files, (bytes, unicode)):
+ raise TypeError("files:{!r} is not an iterable.".format(files))
+
+ file_map = util.normalize_files(files, separators=separators)
+ matched_files = util.match_files(self.patterns, iterkeys(file_map))
+ for path in matched_files:
+ yield file_map[path]
+
+ def match_tree(self, root, on_error=None, follow_links=None):
+ """
+ Walks the specified root path for all files and matches them to this
+ path-spec.
+
+ *root* (:class:`str`) is the root directory to search for files.
+
+ *on_error* (:class:`~collections.abc.Callable` or :data:`None`)
+ optionally is the error handler for file-system exceptions. See
+ :func:`~pathspec.util.iter_tree` for more information.
+
+
+ *follow_links* (:class:`bool` or :data:`None`) optionally is whether
+ to walk symbolik links that resolve to directories. See
+ :func:`~pathspec.util.iter_tree` for more information.
+
+ Returns the matched files (:class:`~collections.abc.Iterable` of
+ :class:`str`).
+ """
+ files = util.iter_tree(root, on_error=on_error, follow_links=follow_links)
+ return self.match_files(files)
+
diff --git a/scancode/lib/pattern.py b/scancode/lib/pattern.py
new file mode 100644
index 0000000..b297f3c
--- /dev/null
+++ b/scancode/lib/pattern.py
@@ -0,0 +1,155 @@
+# encoding: utf-8
+#
+# This is a copy of source code from Pathspec 0.5.9
+# (https://pypi.org/project/pathspec/) which is
+# available under an Mozilla Public License 2.0
+# (https://www.mozilla.org/en-US/MPL/2.0/).
+# A copy of the license is also available in
+# ../../licenses/LICENSE-pathspec.txt.
+#
+"""
+This module provides the base definition for patterns.
+"""
+
+import re
+
+from compat import unicode
+
+
+class Pattern(object):
+ """
+ The :class:`Pattern` class is the abstract definition of a pattern.
+ """
+
+ # Make the class dict-less.
+ __slots__ = ('include',)
+
+ def __init__(self, include):
+ """
+ Initializes the :class:`Pattern` instance.
+
+ *include* (:class:`bool` or :data:`None`) is whether the matched
+ files should be included (:data:`True`), excluded (:data:`False`),
+ or is a null-operation (:data:`None`).
+ """
+
+ self.include = include
+ """
+ *include* (:class:`bool` or :data:`None`) is whether the matched
+ files should be included (:data:`True`), excluded (:data:`False`),
+ or is a null-operation (:data:`None`).
+ """
+
+ def match(self, files):
+ """
+ Matches this pattern against the specified files.
+
+ *files* (:class:`~collections.abc.Iterable` of :class:`str`) contains
+ each file relative to the root directory (e.g., ``"relative/path/to/file"``).
+
+ Returns an :class:`~collections.abc.Iterable` yielding each matched
+ file path (:class:`str`).
+ """
+ raise NotImplementedError("{}.{} must override match().".format(self.__class__.__module__, self.__class__.__name__))
+
+
+class RegexPattern(Pattern):
+ """
+ The :class:`RegexPattern` class is an implementation of a pattern
+ using regular expressions.
+ """
+
+ # Make the class dict-less.
+ __slots__ = ('regex',)
+
+ def __init__(self, pattern, include=None):
+ """
+ Initializes the :class:`RegexPattern` instance.
+
+ *pattern* (:class:`unicode`, :class:`bytes`, :class:`re.RegexObject`,
+ or :data:`None`) is the pattern to compile into a regular
+ expression.
+
+ *include* (:class:`bool` or :data:`None`) must be :data:`None`
+ unless *pattern* is a precompiled regular expression (:class:`re.RegexObject`)
+ in which case it is whether matched files should be included
+ (:data:`True`), excluded (:data:`False`), or is a null operation
+ (:data:`None`).
+
+ .. NOTE:: Subclasses do not need to support the *include*
+ parameter.
+ """
+
+ self.regex = None
+ """
+ *regex* (:class:`re.RegexObject`) is the regular expression for the
+ pattern.
+ """
+
+ if isinstance(pattern, (unicode, bytes)):
+ assert include is None, "include:{!r} must be null when pattern:{!r} is a string.".format(include, pattern)
+ regex, include = self.pattern_to_regex(pattern)
+ # NOTE: Make sure to allow a null regular expression to be
+ # returned for a null-operation.
+ if include is not None:
+ regex = re.compile(regex)
+
+ elif pattern is not None and hasattr(pattern, 'match'):
+ # Assume pattern is a precompiled regular expression.
+ # - NOTE: Used specified *include*.
+ regex = pattern
+
+ elif pattern is None:
+ # NOTE: Make sure to allow a null pattern to be passed for a
+ # null-operation.
+ assert include is None, "include:{!r} must be null when pattern:{!r} is null.".format(include, pattern)
+
+ else:
+ raise TypeError("pattern:{!r} is not a string, RegexObject, or None.".format(pattern))
+
+ super(RegexPattern, self).__init__(include)
+ self.regex = regex
+
+ def __eq__(self, other):
+ """
+ Tests the equality of this regex pattern with *other* (:class:`RegexPattern`)
+ by comparing their :attr:`~Pattern.include` and :attr:`~RegexPattern.regex`
+ attributes.
+ """
+ if isinstance(other, RegexPattern):
+ return self.include == other.include and self.regex == other.regex
+ else:
+ return NotImplemented
+
+ def match(self, files):
+ """
+ Matches this pattern against the specified files.
+
+ *files* (:class:`~collections.abc.Iterable` of :class:`str`)
+ contains each file relative to the root directory (e.g., "relative/path/to/file").
+
+ Returns an :class:`~collections.abc.Iterable` yielding each matched
+ file path (:class:`str`).
+ """
+ if self.include is not None:
+ for path in files:
+ if self.regex.match(path) is not None:
+ yield path
+
+ @classmethod
+ def pattern_to_regex(cls, pattern):
+ """
+ Convert the pattern into an uncompiled regular expression.
+
+ *pattern* (:class:`str`) is the pattern to convert into a regular
+ expression.
+
+ Returns the uncompiled regular expression (:class:`str` or :data:`None`),
+ and whether matched files should be included (:data:`True`),
+ excluded (:data:`False`), or is a null-operation (:data:`None`).
+
+ .. NOTE:: The default implementation simply returns *pattern* and
+ :data:`True`.
+ """
+ return pattern, True
+
diff --git a/scancode/lib/util.py b/scancode/lib/util.py
new file mode 100644
index 0000000..c5bcd7b
--- /dev/null
+++ b/scancode/lib/util.py
@@ -0,0 +1,359 @@
+# encoding: utf-8
+#
+# This is a copy of source code from Pathspec 0.5.9
+# (https://pypi.org/project/pathspec/) which is
+# available under an Mozilla Public License 2.0
+# (https://www.mozilla.org/en-US/MPL/2.0/).
+# A copy of the license is also available in
+# ../../licenses/LICENSE-pathspec.txt.
+#
+"""
+This module provides utility methods for dealing with path-specs.
+"""
+
+import os
+import os.path
+import posixpath
+import stat
+
+from compat import collection_type, string_types
+
+NORMALIZE_PATH_SEPS = [sep for sep in [os.sep, os.altsep] if sep and sep != posixpath.sep]
+"""
+*NORMALIZE_PATH_SEPS* (:class:`list` of :class:`str`) contains the path
+separators that need to be normalized to the POSIX separator for the
+current operating system. The separators are determined by examining
+:data:`os.sep` and :data:`os.altsep`.
+"""
+
+_registered_patterns = {}
+"""
+*_registered_patterns* (``dict``) maps a name (``str``) to the
+registered pattern factory (``callable``).
+"""
+
+def iter_tree(root, on_error=None, follow_links=None):
+ """
+ Walks the specified directory for all files.
+
+ *root* (:class:`str`) is the root directory to search for files.
+
+ *on_error* (:class:`~collections.abc.Callable` or :data:`None`)
+ optionally is the error handler for file-system exceptions. It will be
+ called with the exception (:exc:`OSError`). Reraise the exception to
+ abort the walk. Default is :data:`None` to ignore file-system
+ exceptions.
+
+ *follow_links* (:class:`bool` or :data:`None`) optionally is whether
+ to walk symbolik links that resolve to directories. Default is
+ :data:`None` for :data:`True`.
+
+ Raises :exc:`RecursionError` if recursion is detected.
+
+ Returns an :class:`~collections.abc.Iterable` yielding the path to
+ each file (:class:`str`) relative to *root*.
+ """
+ if on_error is not None and not callable(on_error):
+ raise TypeError("on_error:{!r} is not callable.".format(on_error))
+
+ if follow_links is None:
+ follow_links = True
+
+ for file_rel in _iter_tree_next(os.path.abspath(root), '', {}, on_error, follow_links):
+ yield file_rel
+
+def _iter_tree_next(root_full, dir_rel, memo, on_error, follow_links):
+ """
+ Scan the directory for all descendant files.
+
+ *root_full* (:class:`str`) the absolute path to the root directory.
+
+ *dir_rel* (:class:`str`) the path to the directory to scan relative to
+ *root_full*.
+
+ *memo* (:class:`dict`) keeps track of ancestor directories
+ encountered. Maps each ancestor real path (:class:`str``) to relative
+ path (:class:`str`).
+
+ *on_error* (:class:`~collections.abc.Callable` or :data:`None`)
+ optionally is the error handler for file-system exceptions.
+
+ *follow_links* (:class:`bool`) is whether to walk symbolik links that
+ resolve to directories.
+ """
+ dir_full = os.path.join(root_full, dir_rel)
+ dir_real = os.path.realpath(dir_full)
+
+ # Remember each encountered ancestor directory and its canonical
+ # (real) path. If a canonical path is encountered more than once,
+ # recursion has occurred.
+ if dir_real not in memo:
+ memo[dir_real] = dir_rel
+ else:
+ raise RecursionError(real_path=dir_real, first_path=memo[dir_real], second_path=dir_rel)
+
+ for node in os.listdir(dir_full):
+ node_rel = os.path.join(dir_rel, node)
+ node_full = os.path.join(root_full, node_rel)
+
+ # Inspect child node.
+ try:
+ node_stat = os.lstat(node_full)
+ except OSError as e:
+ if on_error is not None:
+ on_error(e)
+ continue
+
+ if stat.S_ISLNK(node_stat.st_mode):
+ # Child node is a link, inspect the target node.
+ is_link = True
+ try:
+ node_stat = os.stat(node_full)
+ except OSError as e:
+ if on_error is not None:
+ on_error(e)
+ continue
+ else:
+ is_link = False
+
+ if stat.S_ISDIR(node_stat.st_mode) and (follow_links or not is_link):
+ # Child node is a directory, recurse into it and yield its
+ # decendant files.
+ for file_rel in _iter_tree_next(root_full, node_rel, memo, on_error, follow_links):
+ yield file_rel
+
+ elif stat.S_ISREG(node_stat.st_mode):
+ # Child node is a file, yield it.
+ yield node_rel
+
+ # NOTE: Make sure to remove the canonical (real) path of the directory
+ # from the ancestors memo once we are done with it. This allows the
+ # same directory to appear multiple times. If this is not done, the
+ # second occurance of the directory will be incorrectly interpreted as
+ # a recursion. See <https://github.com/cpburnz/python-path-specification/pull/7>.
+ del memo[dir_real]
+
+def lookup_pattern(name):
+ """
+ Lookups a registered pattern factory by name.
+
+ *name* (:class:`str`) is the name of the pattern factory.
+
+ Returns the registered pattern factory (:class:`~collections.abc.Callable`).
+ If no pattern factory is registered, raises :exc:`KeyError`.
+ """
+ return _registered_patterns[name]
+
+def match_file(patterns, file):
+ """
+ Matches the file to the patterns.
+
+ *patterns* (:class:`~collections.abc.Iterable` of :class:`~pathspec.pattern.Pattern`)
+ contains the patterns to use.
+
+ *file* (:class:`str`) is the normalized file path to be matched
+ against *patterns*.
+
+ Returns :data:`True` if *file* matched; otherwise, :data:`False`.
+ """
+ matched = False
+ for pattern in patterns:
+ if pattern.include is not None:
+ if file in pattern.match((file,)):
+ matched = pattern.include
+ return matched
+
+def match_files(patterns, files):
+ """
+ Matches the files to the patterns.
+
+ *patterns* (:class:`~collections.abc.Iterable` of :class:`~pathspec.pattern.Pattern`)
+ contains the patterns to use.
+
+ *files* (:class:`~collections.abc.Iterable` of :class:`str`) contains
+ the normalized file paths to be matched against *patterns*.
+
+ Returns the matched files (:class:`set` of :class:`str`).
+ """
+ all_files = files if isinstance(files, collection_type) else list(files)
+ return_files = set()
+ for pattern in patterns:
+ if pattern.include is not None:
+ result_files = pattern.match(all_files)
+ if pattern.include:
+ return_files.update(result_files)
+ else:
+ return_files.difference_update(result_files)
+ return return_files
+
+def normalize_file(file, separators=None):
+ """
+ Normalizes the file path to use the POSIX path separator (i.e., ``'/'``).
+
+ *file* (:class:`str`) is the file path.
+
+ *separators* (:class:`~collections.abc.Collection` of :class:`str`; or
+ :data:`None`) optionally contains the path separators to normalize.
+ This does not need to include the POSIX path separator (``'/'``), but
+ including it will not affect the results. Default is :data:`None` for
+ :data:`NORMALIZE_PATH_SEPS`. To prevent normalization, pass an empty
+ container (e.g., an empty tuple ``()``).
+
+ Returns the normalized file path (:class:`str`).
+ """
+ # Normalize path separators.
+ if separators is None:
+ separators = NORMALIZE_PATH_SEPS
+ norm_file = file
+ for sep in separators:
+ norm_file = norm_file.replace(sep, posixpath.sep)
+
+ # Remove current directory prefix.
+ if norm_file.startswith('./'):
+ norm_file = norm_file[2:]
+
+ return norm_file
+
+def normalize_files(files, separators=None):
+ """
+ Normalizes the file paths to use the POSIX path separator.
+
+ *files* (:class:`~collections.abc.Iterable` of :class:`str`) contains
+ the file paths to be normalized.
+
+ *separators* (:class:`~collections.abc.Collection` of :class:`str`; or
+ :data:`None`) optionally contains the path separators to normalize.
+ See :func:`normalize_file` for more information.
+
+ Returns a :class:`dict` mapping the each normalized file path (:class:`str`)
+ to the original file path (:class:`str`)
+ """
+ norm_files = {}
+ for path in files:
+ norm_files[normalize_file(path, separators=separators)] = path
+ return norm_files
+
+def register_pattern(name, pattern_factory, override=None):
+ """
+ Registers the specified pattern factory.
+
+ *name* (:class:`str`) is the name to register the pattern factory
+ under.
+
+ *pattern_factory* (:class:`~collections.abc.Callable`) is used to
+ compile patterns. It must accept an uncompiled pattern (:class:`str`)
+ and return the compiled pattern (:class:`.Pattern`).
+
+ *override* (:class:`bool` or :data:`None`) optionally is whether to
+ allow overriding an already registered pattern under the same name
+ (:data:`True`), instead of raising an :exc:`AlreadyRegisteredError`
+ (:data:`False`). Default is :data:`None` for :data:`False`.
+ """
+ if not isinstance(name, string_types):
+ raise TypeError("name:{!r} is not a string.".format(name))
+ if not callable(pattern_factory):
+ raise TypeError("pattern_factory:{!r} is not callable.".format(pattern_factory))
+ if name in _registered_patterns and not override:
+ raise AlreadyRegisteredError(name, _registered_patterns[name])
+ _registered_patterns[name] = pattern_factory
+
+
+class AlreadyRegisteredError(Exception):
+ """
+ The :exc:`AlreadyRegisteredError` exception is raised when a pattern
+ factory is registered under a name already in use.
+ """
+
+ def __init__(self, name, pattern_factory):
+ """
+ Initializes the :exc:`AlreadyRegisteredError` instance.
+
+ *name* (:class:`str`) is the name of the registered pattern.
+
+ *pattern_factory* (:class:`~collections.abc.Callable`) is the
+ registered pattern factory.
+ """
+ super(AlreadyRegisteredError, self).__init__(name, pattern_factory)
+
+ @property
+ def message(self):
+ """
+ *message* (:class:`str`) is the error message.
+ """
+ return "{name!r} is already registered for pattern factory:{pattern_factory!r}.".format(
+ name=self.name,
+ pattern_factory=self.pattern_factory,
+ )
+
+ @property
+ def name(self):
+ """
+ *name* (:class:`str`) is the name of the registered pattern.
+ """
+ return self.args[0]
+
+ @property
+ def pattern_factory(self):
+ """
+ *pattern_factory* (:class:`~collections.abc.Callable`) is the
+ registered pattern factory.
+ """
+ return self.args[1]
+
+
+class RecursionError(Exception):
+ """
+ The :exc:`RecursionError` exception is raised when recursion is
+ detected.
+ """
+
+ def __init__(self, real_path, first_path, second_path):
+ """
+ Initializes the :exc:`RecursionError` instance.
+
+ *real_path* (:class:`str`) is the real path that recursion was
+ encountered on.
+
+ *first_path* (:class:`str`) is the first path encountered for
+ *real_path*.
+
+ *second_path* (:class:`str`) is the second path encountered for
+ *real_path*.
+ """
+ super(RecursionError, self).__init__(real_path, first_path, second_path)
+
+ @property
+ def first_path(self):
+ """
+ *first_path* (:class:`str`) is the first path encountered for
+ :attr:`self.real_path <RecursionError.real_path>`.
+ """
+ return self.args[1]
+
+ @property
+ def message(self):
+ """
+ *message* (:class:`str`) is the error message.
+ """
+ return "Real path {real!r} was encountered at {first!r} and then {second!r}.".format(
+ real=self.real_path,
+ first=self.first_path,
+ second=self.second_path,
+ )
+
+ @property
+ def real_path(self):
+ """
+ *real_path* (:class:`str`) is the real path that recursion was
+ encountered on.
+ """
+ return self.args[0]
+
+ @property
+ def second_path(self):
+ """
+ *second_path* (:class:`str`) is the second path encountered for
+ :attr:`self.real_path <RecursionError.real_path>`.
+ """
+ return self.args[2]
+
diff --git a/scancode/scanCode.py b/scancode/scanCode.py
index 029bfad..06d7dd3 100755
--- a/scancode/scanCode.py
+++ b/scancode/scanCode.py
@@ -40,6 +40,11 @@ import re
import sys
import textwrap
+# import pathspec from local lib path
+sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)) + '/lib')
+import pathspec
+from gitwildmatch import GitWildMatchPattern
+
VERBOSE = False
# Terminal colors
@@ -65,6 +70,7 @@ ERR_TABS = "line contains tabs."
ERR_TRAILING_WHITESPACE = "line has trailing whitespace."
HELP_CONFIG_FILE = "provide custom configuration file"
+HELP_GITIGNORE_FILE = "provide .gitignore file for additional path exclusions"
HELP_DISPLAY_EXCLUSIONS = "display path exclusion information"
HELP_ROOT_DIR = "starting directory for the scan"
HELP_VERBOSE = "enable verbose output"
@@ -74,6 +80,7 @@ MSG_CHECKS_PASSED = "All checks passed."
MSG_CONFIG_ADDING_LICENSE_FILE = "Adding valid license from: [%s], value:\n%s"
MSG_ERROR_SUMMARY = "Scan detected %d error(s) in %d file(s):"
MSG_READING_CONFIGURATION = "Reading configuration file [%s]..."
+MSG_READING_GITIGNORE = "Reading gitignore file [%s]..."
MSG_READING_LICENSE_FILE = "Reading license file [%s]..."
MSG_RUNNING_FILE_CHECKS = " Running File Check [%s]"
MSG_RUNNING_LINE_CHECKS = " Running Line Check [%s]"
@@ -81,10 +88,10 @@ MSG_SCANNING_FILTER = "Scanning files with filter: [%s]:"
MSG_SCANNING_STARTED = "Scanning files starting at [%s]..."
WARN_CONFIG_SECTION_NOT_FOUND = "Configuration file section [%s] not found."
-WARN_SCAN_EXCLUDED_PATH_SUMMARY = "Scan excluded (%s) directories:"
+WARN_SCAN_EXCLUDED_PATH_SUMMARY = "Scan excluded (%s) patterns:"
WARN_SCAN_EXCLUDED_FILE_SUMMARY = "Scan excluded (%s) files:"
WARN_SCAN_EXCLUDED_FILE = " Excluded file: %s"
-WARN_SCAN_EXCLUDED_PATH = " Excluded path: %s"
+WARN_SCAN_EXCLUDED_PATH = " Excluded pattern: %s"
MSG_DESCRIPTION = "Scans all source code under specified directory for " \
"project compliance using provided configuration."
@@ -211,18 +218,21 @@ def read_license_files(config):
raise Exception(ERR_REQUIRED_SECTION % SECTION_LICENSE)
-def read_path_exclusions(config):
+def read_path_exclusions(config, gitignore_file):
"""Read the list of paths to exclude from the scan."""
path_dict = get_config_section_dict(config, SECTION_EXCLUDE)
# vprint("path_dict: " + str(path_dict))
if path_dict is not None:
# each 'key' is an exclusion path
for key in path_dict:
+ key = str.strip(key)
if key is not None:
exclusion_paths.append(key)
- else:
- raise Exception(ERR_REQUIRED_SECTION % SECTION_LICENSE)
+ if gitignore_file is not None:
+ print_highlight(MSG_READING_GITIGNORE % gitignore_file.name)
+ for line in gitignore_file.read().splitlines():
+ exclusion_paths.append(line)
def read_scan_options(config):
"""Read the Options from the configuration file."""
@@ -251,7 +261,7 @@ def read_regex(config):
raise Exception(ERR_REQUIRED_SECTION % SECTION_REGEX)
-def read_config_file(file):
+def read_config_file(file, gitignore_file):
"""Read in and validate configuration file."""
try:
print_highlight(MSG_READING_CONFIGURATION % file.name)
@@ -263,7 +273,7 @@ def read_config_file(file):
config.readfp(file)
read_license_files(config)
read_path_inclusions(config)
- read_path_exclusions(config)
+ read_path_exclusions(config, gitignore_file)
read_scan_options(config)
read_regex(config)
except Exception as e:
@@ -399,32 +409,20 @@ def run_line_checks(file_path, checks):
errors.append((line_number, err))
return errors
-
def all_paths(root_dir):
"""Generator that returns files with known extensions that can be scanned.
Iteration is recursive beginning at the passed root directory and
skipping directories that are listed as exception paths.
"""
- # For every file in every directory (path) starting at "root_dir"
+ spec = pathspec.PathSpec.from_lines(GitWildMatchPattern, exclusion_paths)
+ exclusion_files_set = set(map(lambda f: os.path.join(root_dir, f), spec.match_tree(root_dir)))
+
for dir_path, dir_names, files in os.walk(root_dir):
for f in files:
filename = os.path.join(dir_path, f)
-
- # Map will contain a boolean for each exclusion path tested
- # as input to the lambda function.
- # only if all() values in the Map are "True" (meaning the file is
- # not excluded) then it should yield the filename to run checks on.
- # not dir_path.endswith(p) and
- if all(map(lambda p: p not in dir_path, exclusion_paths)):
- # directory not excluded, now check for any file exclusions
- if all(map(lambda p: p not in filename, exclusion_paths)):
- yield filename
- else:
- exclusion_files_set.add(filename)
- else:
- # directory is excluded
- exclusion_files_set.add(filename)
+ if filename not in exclusion_files_set:
+ yield filename
def colors():
"""Create a collection of helper functions to colorize strings."""
@@ -488,6 +486,11 @@ if __name__ == "__main__":
dest="config",
default=DEFAULT_CONFIG_FILE,
help=HELP_CONFIG_FILE)
+ parser.add_argument("--gitignore",
+ type=argparse.FileType('r'),
+ action="store",
+ dest="gitignore",
+ help=HELP_GITIGNORE_FILE)
parser.add_argument("root_directory",
type=str,
default=DEFAULT_ROOT_DIR,
@@ -500,6 +503,7 @@ if __name__ == "__main__":
# Config file at this point is an actual file object
config_file = args.config
+ gitignore_file = args.gitignore
# Assign supported scan functions to either file or line globals
# These checks run once per-file
@@ -517,7 +521,7 @@ if __name__ == "__main__":
})
# Read / load configuration file from file (pointer)
- if read_config_file(config_file) == -1:
+ if read_config_file(config_file, gitignore_file) == -1:
exit(1)
# Verify starting path parameter is valid
@@ -532,11 +536,12 @@ if __name__ == "__main__":
# Runs all listed checks on all relevant files.
all_errors = []
+ paths_to_check = set(all_paths(root_dir))
for fltr, chks1, chks2 in FILTERS_WITH_CHECK_FUNCTIONS:
# print_error(col.cyan(MSG_SCANNING_FILTER % fltr))
# print_error("chks1=" + str(chks1))
# print_error("chks2=" + str(chks2))
- for path in fnmatch.filter(all_paths(root_dir), fltr):
+ for path in fnmatch.filter(paths_to_check, fltr):
errors = run_file_checks(path, chks1)
errors += run_line_checks(path, chks2)
all_errors += map(lambda p: (path, p[0], p[1]), errors)
diff --git a/scancode/travis.cfg b/scancode/travis.cfg
index 9c10889..8f90563 100644
--- a/scancode/travis.cfg
+++ b/scancode/travis.cfg
@@ -52,6 +52,9 @@ ASFMinifiedLicenseHeaderREM.txt
# Scancode unit tests
tests/exclude
+# Pathspec library
+lib/
+
[Options]
# Not all code files allow licenses to appear starting at the first character
# of the file. This option tells the scan to allow licenses to appear starting