You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by jb...@apache.org on 2016/09/14 14:45:50 UTC

[3/3] incubator-impala git commit: IMPALA-4110: Apache RAT script on Impala tarballs.

IMPALA-4110: Apache RAT script on Impala tarballs.

Apache RAT is a tool for license auditing. It will be used as part of
the Apache release process. This patch includes a script for parsing
its output and a file containing a list of filename globs that should
be ignored. The script takes two command line parameters as input -
the filename of the ignored file globs, and the filename of the RAT
xml output.

Change-Id: Ic95bd38fbb90f9a901602dd91cee541b16bf4714
Reviewed-on: http://gerrit.cloudera.org:8080/4405
Reviewed-by: Alex Behm <al...@cloudera.com>
Tested-by: Jim Apple <jb...@cloudera.com>


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/aa28e37e
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/aa28e37e
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/aa28e37e

Branch: refs/heads/master
Commit: aa28e37eb7df9a53dbcf7aabe910095681dd3a51
Parents: f4bbd41
Author: Jim Apple <jb...@cloudera.com>
Authored: Tue Sep 13 14:47:57 2016 -0700
Committer: Jim Apple <jb...@cloudera.com>
Committed: Wed Sep 14 14:42:23 2016 +0000

----------------------------------------------------------------------
 bin/check-rat-report.py   |  80 +++++++++++++++++++++++++++++
 bin/rat_exclude_files.txt | 111 +++++++++++++++++++++++++++++++++++++++++
 2 files changed, 191 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/aa28e37e/bin/check-rat-report.py
----------------------------------------------------------------------
diff --git a/bin/check-rat-report.py b/bin/check-rat-report.py
new file mode 100755
index 0000000..2181015
--- /dev/null
+++ b/bin/check-rat-report.py
@@ -0,0 +1,80 @@
+#!/usr/bin/python
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# Apache RAT is a tool for checking license compliance. This is a script that uses Apache
+# RAT to check licenses in Impala.
+#
+# It takes as command line parameters two file names - the first is the name of a file
+# containing globs of files to ignore, and the second is the XML output of RAT.
+#
+# I tested this with
+#
+#    pushd "${IMPALA_HOME}"
+#    export SANDBOX=$(mktemp -d) # Just a place to put files for testing
+#    echo "${SANDBOX}"
+#    git archive -o "${SANDBOX}/test-impala.tar.gz" HEAD # Make the tarball to check
+#    java -jar ~/Downloads/apache-rat-0.12/apache-rat-0.12.jar -x \
+#        "${SANDBOX}/test-impala.tar.gz" >"${SANDBOX}/rat.xml"
+#    bin/check-rat-report.py bin/rat_exclude_files.txt "${SANDBOX}/rat.xml"
+#
+# This is copied from a similar file in Apache Kudu. Only RAT 0.12 is supported at this
+# time.
+
+import fnmatch
+import re
+import sys
+import xml.etree.ElementTree as ET
+
+if len(sys.argv) != 3:
+  sys.stderr.write("Usage: %s exclude_globs.lst rat_report.xml\n" % (sys.argv[0],))
+  sys.exit(1)
+
+exclude_globs_filename = sys.argv[1]
+xml_filename = sys.argv[2]
+
+globs = [line.strip() for line in open(exclude_globs_filename, "r") if "# " != line[0:2]]
+
+tree = ET.parse(xml_filename)
+root = tree.getroot()
+all_ok = True
+
+resources = root.findall('resource')
+for r in resources:
+  approvals = r.findall('license-approval')
+  if approvals and approvals[0].attrib['name'] == 'true':
+    continue
+  clean_name = re.sub('^[^/]+/', '', r.attrib['name'])
+  excluded = False
+  for g in globs:
+    if fnmatch.fnmatch(clean_name, g):
+      excluded = True
+      break
+  if not excluded:
+    typename = r.findall('type')[0].attrib['name']
+    if not (clean_name[0:9] == 'testdata/' and typename in ['archive', 'binary']):
+      sys.stderr.write(
+          "%s: %s\n" %
+          ('UNAPPROVED' if approvals else "NO APPROVALS; " + typename, clean_name))
+      all_ok = False
+
+if not all_ok:
+  sys.exit(1)
+
+print 'OK'
+sys.exit(0)

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/aa28e37e/bin/rat_exclude_files.txt
----------------------------------------------------------------------
diff --git a/bin/rat_exclude_files.txt b/bin/rat_exclude_files.txt
new file mode 100644
index 0000000..d3ee414
--- /dev/null
+++ b/bin/rat_exclude_files.txt
@@ -0,0 +1,111 @@
+# These are the globs that RAT ignores when doing a copyright
+# audit. Comments start with "# ".
+
+# http://www.apache.org/legal/src-headers.html: "A file without any
+# degree of creativity in either its literal elements or its structure
+# is not protected by copyright law; therefore, such a file does not
+# require a license header."
+.clang-format
+.gitignore
+*/.gitignore
+*/rat_exclude_files.txt
+be/src/testutil/htpasswd
+be/src/testutil/*.key
+tests/*/__init__.py
+testdata/common/__init__.py
+fe/src/test/resources/regionservers
+shell/__init__.py
+ssh_keys/id_rsa_impala
+testdata/__init__.py
+tests/__init__.py
+www/index.html
+
+# See LICENSE.txt
+be/src/gutil/*
+www/highlight/*
+www/DataTables*/*
+www/datatables.*
+www/bootstrap/*
+tests/comparison/leopard/static/css/bootstrap*
+tests/comparison/leopard/static/js/bootstrap*
+shell/ext-py/prettytable-0.7.1/*
+shell/ext-py/sqlparse-0.1.14/*
+shell/ext-py/sasl-0.1.1/*
+www/d3.v3.min.js
+www/jquery/jquery-1.12.4.min.js
+
+# http://www.apache.org/legal/src-headers.html: "Short informational text files; for
+# example README, INSTALL files. The expectation is that these files make it obvious which
+# product they relate to."
+be/src/testutil/certificates-info.txt
+bin/README-RUNNING-BENCHMARKS
+LOGS.md
+README.md
+*/README
+*/README.dox
+testdata/bin/README-BENCHMARK-TEST-GENERATION
+tests/comparison/ORACLE.txt
+
+# http://www.apache.org/legal/src-headers.html: "Test data for which the addition of a
+# source header would cause the tests to fail."
+testdata/*.csv
+testdata/*.test
+be/src/testutil/*.pem
+*.json
+fe/src/test/resources/*.xml
+fe/src/test/resources/hbase-jaas-client.conf.template
+fe/src/test/resources/hbase-jaas-server.conf.template
+llvm-ir/test-loop.bc
+testdata/AllTypesError/*.txt
+testdata/AllTypesErrorNoNulls/*.txt
+*.avsc
+*.parq
+*.parquet
+testdata/cluster/node_templates/cdh5/etc/hadoop/conf/*.xml.tmpl
+testdata/cluster/node_templates/cdh5/etc/kudu/*.conf.tmpl
+testdata/cluster/node_templates/common/etc/hadoop/conf/*.xml.tmpl
+testdata/data/chars-formats.txt
+testdata/data/chars-tiny.txt
+testdata/data/decimal-tiny.txt
+testdata/data/decimal_tbl.txt
+testdata/data/overflow.txt
+testdata/data/text-comma-backslash-newline.txt
+testdata/data/text-dollar-hash-pipe.txt
+testdata/data/widerow.txt
+testdata/data/local_tbl/00000.txt
+testdata/datasets/functional/functional_schema_template.sql
+testdata/hive_benchmark/grepTiny/part-00000
+tests/pytest.ini
+tests/shell/bad_impalarc
+tests/shell/good_impalarc
+tests/shell/shell.cmds
+tests/shell/shell2.cmds
+tests/shell/shell_error.cmds
+tests/shell/test_close_queries.sql
+tests/shell/test_file_comments.sql
+tests/shell/test_file_no_comments.sql
+tests/shell/test_var_substitution.sql
+
+
+# Generated by Apache-licensed software:
+be/src/transport/config.h
+
+# BSD 3-clause license that RAT can't seem to identify:
+cmake_modules/FindJNI.cmake
+
+# http://www.apache.org/legal/resolved.html#category-a : Python Software Foundation
+# License is allowed.
+shell/pkg_resources.py
+
+# Notices in Impala as required by ASF rules:
+DISCLAIMER
+LICENSE.txt
+NOTICE.txt
+
+# Notices in thirdparty sources included in the Impala repo and called out in /LICENSE.txt
+be/src/thirdparty/squeasel/LICENSE
+
+# http://www.apache.org/legal/src-headers.html: 'Snippet' files that are combined as form
+# a larger file where the larger file would have duplicate licensing headers.
+www/all_child_groups.tmpl
+www/common-footer.tmpl