You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@beam.apache.org by am...@apache.org on 2020/09/21 18:11:40 UTC
[beam] branch master updated: [BEAM-9136] Add python dependency
license CSV for license URL and type
This is an automated email from the ASF dual-hosted git repository.
amyrvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git
The following commit(s) were added to refs/heads/master by this push:
new 8fe5805 [BEAM-9136] Add python dependency license CSV for license URL and type
new 9d2fce7 Merge pull request #12879 from alanmyrvold/py-license-list
8fe5805 is described below
commit 8fe5805454d957d40127efee5236461712642dc3
Author: Alan Myrvold <am...@google.com>
AuthorDate: Fri Sep 18 23:54:44 2020 +0000
[BEAM-9136] Add python dependency license CSV for license URL and type
---
sdks/python/container/Dockerfile | 6 ++-
.../container/license_scripts/pull_licenses_py.py | 50 ++++++++++++++++++++--
2 files changed, 52 insertions(+), 4 deletions(-)
diff --git a/sdks/python/container/Dockerfile b/sdks/python/container/Dockerfile
index a7c498b..9d1d3a8 100644
--- a/sdks/python/container/Dockerfile
+++ b/sdks/python/container/Dockerfile
@@ -45,7 +45,8 @@ RUN \
# Check that the fast implementation of protobuf is used.
python -c "from google.protobuf.internal import api_implementation; assert api_implementation._default_implementation_type == 'cpp'; print ('Verified fast protobuf used.')" && \
# Remove pip cache.
- rm -rf /root/.cache/pip
+ rm -rf /root/.cache/pip && \
+ rm -rf /tmp/base_image_requirements.txt
# Configure ccache prior to installing the SDK.
RUN ln -s /usr/bin/ccache /usr/local/bin/gcc
@@ -76,6 +77,9 @@ RUN if [ "$pull_licenses" = "true" ] ; then \
rm -rf /opt/apache/beam/third_party_licenses ; \
fi
+# Remove license scripts
+RUN rm -rf /tmp/license_scripts
+
# Log complete list of what exact packages and versions are installed.
RUN pip freeze --all
# Make sure there are no conflicting dependencies.
diff --git a/sdks/python/container/license_scripts/pull_licenses_py.py b/sdks/python/container/license_scripts/pull_licenses_py.py
index c987015..75f629d 100644
--- a/sdks/python/container/license_scripts/pull_licenses_py.py
+++ b/sdks/python/container/license_scripts/pull_licenses_py.py
@@ -19,6 +19,7 @@
A script to pull licenses for Python.
The script is executed within Docker.
"""
+import csv
import json
import logging
import os
@@ -30,6 +31,9 @@ import traceback
import yaml
from future.moves.urllib.request import urlopen
+from future.moves.urllib.parse import urlparse
+from future.moves.urllib.parse import urljoin
+
from tenacity import retry
from tenacity import stop_after_attempt
from tenacity import wait_exponential
@@ -42,7 +46,7 @@ def run_bash_command(command):
def run_pip_licenses():
- command = 'pip-licenses --with-license-file --format=json'
+ command = 'pip-licenses --with-license-file --with-urls --from=mixed --ignore apache-beam --format=json'
dependencies = run_bash_command(command)
return json.loads(dependencies)
@@ -53,7 +57,7 @@ def copy_license_files(dep):
if source_license_file.lower() == 'unknown':
return False
name = dep['Name'].lower()
- dest_dir = '/'.join([LICENSE_DIR, name])
+ dest_dir = os.path.join(LICENSE_DIR, name)
try:
os.mkdir(dest_dir)
shutil.copy(source_license_file, dest_dir + '/LICENSE')
@@ -84,7 +88,7 @@ def pull_from_url(dep, configs):
'''
if dep in configs:
config = configs[dep]
- dest_dir = '/'.join([LICENSE_DIR, dep])
+ dest_dir = os.path.join(LICENSE_DIR, dep)
cur_temp_dir = tempfile.mkdtemp()
try:
@@ -116,6 +120,43 @@ def pull_from_url(dep, configs):
shutil.rmtree(cur_temp_dir)
+def license_url(name, project_url, dep_config):
+ '''
+ Gets the license URL for a dependency, either from the parsed yaml or,
+ if it is github, by looking for a license file in the repo.
+ '''
+ configs = dep_config['pip_dependencies']
+ if name.lower() in configs:
+ return configs[name.lower()]['license']
+ p = urlparse(project_url)
+ if p.netloc != "github.com":
+ return project_url
+ raw = "https://raw.githubusercontent.com"
+ path = p.path
+ if not path.endswith("/"):
+ path = path + "/"
+ for license in ("LICENSE", "LICENSE.txt", "LICENSE.md", "LICENSE.rst", "COPYING"):
+ try:
+ url = raw + urljoin(path,"master/"+license)
+ with urlopen(url) as a:
+ if a.getcode() == 200:
+ return url
+ except:
+ pass
+ return project_url
+
+
+def save_license_list(csv_filename, dependencies, dep_config):
+ '''
+ Save the names, URLs, and license type for python dependency licenses in a CSV file.
+ '''
+ with open(csv_filename, mode='w') as f:
+ writer = csv.writer(f)
+ for dep in dependencies:
+ url = license_url(dep['Name'], dep['URL'], dep_config)
+ writer.writerow([dep['Name'], url, dep['License']])
+
+
if __name__ == "__main__":
no_licenses = []
logging.getLogger().setLevel(logging.INFO)
@@ -124,6 +165,9 @@ if __name__ == "__main__":
dep_config = yaml.full_load(file)
dependencies = run_pip_licenses()
+ csv_filename = os.path.join(LICENSE_DIR, 'python-licenses.csv')
+ save_license_list(csv_filename, dependencies, dep_config)
+
# add licenses for pip installed packages.
# try to pull licenses with pip-licenses tool first, if no license pulled,
# then pull from URLs.