You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@stanbol.apache.org by fc...@apache.org on 2011/07/13 14:41:29 UTC
svn commit: r1145980 - /incubator/stanbol/trunk/conventions/insert_license.pl
Author: fchrist
Date: Wed Jul 13 12:41:29 2011
New Revision: 1145980
URL: http://svn.apache.org/viewvc?rev=1145980&view=rev
Log:
STANBOL-271 added modified insert_license.pl script. This version has support for Freemarker *.ftl files
Added:
incubator/stanbol/trunk/conventions/insert_license.pl (with props)
Added: incubator/stanbol/trunk/conventions/insert_license.pl
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/conventions/insert_license.pl?rev=1145980&view=auto
==============================================================================
--- incubator/stanbol/trunk/conventions/insert_license.pl (added)
+++ incubator/stanbol/trunk/conventions/insert_license.pl Wed Jul 13 12:41:29 2011
@@ -0,0 +1,541 @@
+#!/usr/bin/perl -w
+
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+#=========================================================
+#
+# For files that do not yet have an Apache License, insert the 2.0 license.
+# Adds comment markers for the relevant file type.
+#
+# This can also be used to provide a summary of the current situation.
+# It will detect the presence of various different license headers.
+# Use the -p option for practice mode.
+#
+# Limitations:
+# - Only developed and tested for certain file types. Others will be
+# reported and skipped.
+# Needs tweaks for other types (see "configuration" section below).
+# - Only inserts missing licenses and detects and reports other license types.
+# See ./update-AL20.pl to update to the current license style.
+#
+# Caveats:
+# - As usual, make a backup of your tree first or be prepared to 'svn revert -R'
+# your working copy if the script stuffs up.
+#
+# WARNING: Be sure to look at the output of this script for warnings.
+# WARNING: Be sure to do the normal 'svn diff' and review.
+# Attend to the warning in tools/copy2license.pl about "collective copyright".
+#
+# Developed only for UNIX, YMMV.
+#
+# Procedure:
+# See ./relicense.txt for an example procedure.
+# Use -p for practise mode.
+# Run the script. It will descend the directory tree.
+# Run with no parameters or -h to show usage.
+#
+#=========================================================
+
+use strict;
+use vars qw($opt_h $opt_p);
+use Getopt::Std;
+use File::Basename;
+use File::Find;
+
+#--------------------------------------------------
+# ensure proper usage
+getopts("hp");
+if ((scalar @ARGV < 1) || defined($opt_h)) {
+ ShowUsage();
+ exit;
+}
+
+my $startDir = shift;
+my $avoidList = shift;
+if (!-e $startDir) {
+ print STDERR qq!
+The start directory '$startDir' does not exist.
+!;
+ ShowUsage();
+ exit;
+}
+if (defined($avoidList) && !-e $avoidList) {
+ print STDERR qq!
+The list of files to avoid '$avoidList' does not exist.
+!;
+ ShowUsage();
+ exit;
+}
+if ($opt_p) { print STDERR "\nDoing practice run. No files will be written\n"; }
+print qq!
+AL-20 = Apache License 2.0 with original Copyright line.
+AL-20a = Apache License 2.0 with original Copyright line and "or its licensors".
+AL-20b = Apache License 2.0 with no Copyright line, i.e. the current style.
+----------------------
+
+!;
+
+#--------------------------------------------------
+# do some configuration
+my $license = qq!Licensed to the Apache Software Foundation (ASF) under one or more
+contributor license agreements. See the NOTICE file distributed with
+this work for additional information regarding copyright ownership.
+The ASF licenses this file to You under the Apache License, Version 2.0
+(the "License"); you may not use this file except in compliance with
+the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+!;
+my @license = split(/\n/, $license);
+
+# build a hash of filename extensions to be processed
+# together with the particular style of comment marker to use.
+my @xmlFileTypes = (
+ ".xml", ".xsl", ".xslt", ".xmap", ".xcat",
+ ".xconf", ".xroles", ".roles", ".xsp", ".rss",
+ ".xinfo", ".xprofile", ".xsamples", ".xtest", ".xweb", ".xwelcome",
+ ".samplesxconf", ".samplesxpipe", ".svg", ".xhtml", ".xhtml2", ".gt", ".jx", ".jmx",
+ ".jdo", ".orm", ".jdoquery", ".jelly",
+ ".jxt", ".meta", ".pagesheet", ".stx", ".xegrm", ".xgrm", ".xlex", ".xmi",
+ ".xsd", ".rng", ".rdf", ".rdfs", ".xul", ".tld", ".xxe", ".ft", ".fv",
+ ".wsdd", ".wsdl", ".xlog", ".pom",
+);
+my @sgmlFileTypes = (
+ ".dtd", ".mod", ".sgml", ".sgm",
+);
+my @htmlFileTypes = (
+ ".html", ".htm", ".jsp", ".ihtml",
+);
+my @freemarkerFileTypes = (
+ ".ftl",
+);
+my @cFileTypes = (
+ ".java", ".js", ".c", ".h", ".cpp", ".cc", ".cs", ".css", ".egrm", ".grm",
+ ".javascript", ".jj", ".gy", ".g",
+);
+my @shFileTypes = (
+ ".sh", ".ccf", ".pl", ".py", ".sed", ".awk",
+);
+my @propertiesFileTypes = (
+ ".properties", ".rnc", ".rnx", ".prefs", ".rb", ".handlers", ".schemas",
+);
+my @dosFileTypes = (
+ ".bat", ".cmd",
+);
+my @sqlFileTypes = (
+ ".script", ".sql",
+);
+my @vmFileTypes = (
+ ".vm",
+);
+my @ignoreFileTypes = (
+ ".txt", ".dcl", ".ent", ".pen", ".project"
+);
+my (%fileTypes, $fileType);
+foreach $fileType (@xmlFileTypes) {
+ $fileTypes{$fileType}{type} = "xml";
+ $fileTypes{$fileType}{openComment} = "<!--\n";
+ $fileTypes{$fileType}{leaderComment} = " ";
+ $fileTypes{$fileType}{closeComment} = "-->\n";
+ # insert after line 1 which must be the xml declaration
+ $fileTypes{$fileType}{insertionPoint} = "1";
+}
+foreach $fileType (@sgmlFileTypes) {
+ $fileTypes{$fileType}{type} = "sgml";
+ $fileTypes{$fileType}{openComment} = "<!--\n";
+ $fileTypes{$fileType}{leaderComment} = " ";
+ $fileTypes{$fileType}{closeComment} = "-->\n";
+ # insert at very top of file
+ $fileTypes{$fileType}{insertionPoint} = "0";
+}
+foreach $fileType (@htmlFileTypes) {
+ $fileTypes{$fileType}{type} = "html";
+ $fileTypes{$fileType}{openComment} = "<!--\n";
+ $fileTypes{$fileType}{leaderComment} = " ";
+ $fileTypes{$fileType}{closeComment} = "-->\n";
+ # insert at very top of file
+ $fileTypes{$fileType}{insertionPoint} = "0";
+}
+foreach $fileType (@freemarkerFileTypes) {
+ $fileTypes{$fileType}{type} = "html";
+ $fileTypes{$fileType}{openComment} = "<#--\n";
+ $fileTypes{$fileType}{leaderComment} = " ";
+ $fileTypes{$fileType}{closeComment} = "-->\n";
+ # insert at very top of file
+ $fileTypes{$fileType}{insertionPoint} = "0";
+}
+foreach $fileType (@cFileTypes) {
+ $fileTypes{$fileType}{type} = "C";
+ $fileTypes{$fileType}{openComment} = "/*\n";
+ $fileTypes{$fileType}{leaderComment} = "* ";
+ $fileTypes{$fileType}{closeComment} = "*/\n";
+ # insert at very top of file
+ $fileTypes{$fileType}{insertionPoint} = "0";
+}
+foreach $fileType (@shFileTypes) {
+ $fileTypes{$fileType}{type} = "sh";
+ $fileTypes{$fileType}{openComment} = "\n";
+ $fileTypes{$fileType}{leaderComment} = "# ";
+ $fileTypes{$fileType}{closeComment} = "\n";
+ # insert after line 1 which must be #! script invocation
+ $fileTypes{$fileType}{insertionPoint} = "1";
+}
+foreach $fileType (@propertiesFileTypes) {
+ $fileTypes{$fileType}{type} = "properties";
+ $fileTypes{$fileType}{openComment} = "";
+ $fileTypes{$fileType}{leaderComment} = "# ";
+ $fileTypes{$fileType}{closeComment} = "\n";
+ # insert at very top of file
+ $fileTypes{$fileType}{insertionPoint} = "0";
+}
+foreach $fileType (@dosFileTypes) {
+ $fileTypes{$fileType}{type} = "dos";
+ $fileTypes{$fileType}{openComment} = "\@echo off\n";
+ $fileTypes{$fileType}{leaderComment} = "rem ";
+ $fileTypes{$fileType}{closeComment} = "\n";
+ # insert at very top of file
+ $fileTypes{$fileType}{insertionPoint} = "0";
+}
+foreach $fileType (@sqlFileTypes) {
+ $fileTypes{$fileType}{type} = "sql";
+ $fileTypes{$fileType}{openComment} = "";
+ $fileTypes{$fileType}{leaderComment} = "-- ";
+ $fileTypes{$fileType}{closeComment} = "\n";
+ # insert at very top of file
+ $fileTypes{$fileType}{insertionPoint} = "0";
+}
+foreach $fileType (@vmFileTypes) {
+ $fileTypes{$fileType}{type} = "vm";
+ $fileTypes{$fileType}{openComment} = "#*\n";
+ $fileTypes{$fileType}{leaderComment} = " ";
+ $fileTypes{$fileType}{closeComment} = "*#\n";
+ # insert after line 1 which must be the xml declaration
+ $fileTypes{$fileType}{insertionPoint} = "1";
+}
+
+my ($countTotal, $countUnknownType, $countIgnoreType) = (0, 0, 0);
+my ($countXmlDeclMissing, $countInserted, $countAvoid) = (0, 0, 0);
+my ($countLicense, $countLicense10, $countLicense11, $countLicense12) = (0, 0, 0, 0);
+my ($countLicensePD, $countLicenseOther) = (0, 0);
+my ($countLicense20, $countLicense20a, $countLicense20b) = (0, 0, 0);
+
+# 3rdParty users of an Apache License
+my ($countLicenseF20, $countLicenseF11, $countLicenseF12) = (0, 0, 0);
+
+my $dualLicensesDetected = 0;
+my %uniqueSuffixes;
+my @avoidList;
+
+# read the avoidList
+if (defined($avoidList)) {
+ open(INPUT, "<$avoidList") or die "Could not open input file '$avoidList': $!";
+ while (<INPUT>) {
+ next if (/^#/);
+ chomp;
+ push(@avoidList, $_);
+ }
+ close INPUT;
+}
+
+#--------------------------------------------------
+sub process_file {
+ return unless -f && -T; # process only text files
+ my $fileName = $File::Find::name;
+ my ($file, $dir, $ext) = fileparse($fileName, qr/\.[^.]*/);
+ return if ($dir =~ /\/CVS\//); # skip CVS directories
+ return if ($dir =~ /\/\.svn\//); # skip SVN directories
+ return if ($fileName =~ /.cvsignore/); # skip
+ return if ($file =~ /^\./); # skip hidden files
+ foreach my $avoidFn (@avoidList) {
+ if ($fileName =~ /$avoidFn/) {
+ $countAvoid++;
+ return;
+ }
+ }
+ $countTotal++;
+ if ($ext eq "") { $ext = "NoExtension"; }
+ $uniqueSuffixes{$ext}++;
+ print "$fileName, ";
+ my $tmpFile = $fileName . ".tmp";
+ open(INPUT, "<$fileName") or die "Could not open input file '$fileName': $!";
+
+ # First do some tests on the file to ensure it does not already have a license
+ # and ensure that XML files have an xml declaration.
+ my ($existsLicense, $warnDualLicense, $existsXmlDecl) = (0, 0, 0);
+ my ($warnAL20OldLicense) = 0;
+ my ($warnAL20aOldLicense) = 0;
+ my $licenseType = "";
+ undef $/; # slurp the whole file
+ my $content = <INPUT>;
+ # we want our matches to happen only in the top part of the file
+# NOTE: You may want to relax this from time-to-time to find
+# all possible dual-license issues.
+ my $headContent = substr($content, 0, 1500);
+ $headContent =~ s/[ \t]+/ /g;
+
+ # detect various existing licenses
+ LICENSE_CASE: {
+ if ($headContent =~ /Licensed to the Apache Software Foundation \(ASF\) under/) {
+ $existsLicense = 1; $countLicense++;
+ $countLicense20b++; $licenseType = "AL-20b";
+ last LICENSE_CASE;
+ }
+ if ($headContent =~ /Licensed under the Apache License.*Version 2.0/) {
+ $existsLicense = 1; $countLicense++;
+ if ($headContent =~ /Apache Software Foundation or its licensors/) {
+ $countLicense20a++; $licenseType = "AL-20a";
+ $warnAL20aOldLicense = 1;
+ }
+ else {
+ if ($headContent =~ /Copyright.*Apache Software Foundation/) {
+ $countLicense20++; $licenseType = "AL-20";
+ $warnAL20OldLicense = 1;
+ }
+ else {
+ $countLicenseF20++; $licenseType = "F-AL-20";
+ }
+ }
+ last LICENSE_CASE;
+ }
+ if ($headContent =~ /The Apache Software License.*Version 1.2/) {
+ $existsLicense = 1; $countLicense++;
+ if ($headContent =~ /Copyright.*Apache Software Foundation/) {
+ $countLicense12++; $licenseType = "AL-12";
+ }
+ else {
+ $countLicenseF12++; $licenseType = "F-AL-12";
+ }
+ last LICENSE_CASE;
+ }
+ if ($headContent =~ /The Apache Software License.*Version 1.1/) {
+ $existsLicense = 1; $countLicense++;
+ if ($headContent =~ /Copyright.*Apache Software Foundation/) {
+ $countLicense11++; $licenseType = "AL-11";
+ }
+ else {
+ $countLicenseF11++; $licenseType = "F-AL-11";
+ }
+ last LICENSE_CASE;
+ }
+ if ($headContent =~ /Copyright.*The Apache Group/) {
+ $countLicense10++; $licenseType = "AL-10";
+ $existsLicense = 1; $countLicense++;
+ last LICENSE_CASE;
+ }
+ if ($headContent =~ /Public Domain.*/i) {
+ $countLicensePD++; $licenseType = "PublicDomain";
+ $existsLicense = 1; $countLicense++;
+ last LICENSE_CASE;
+ }
+ # catchall
+ if ($headContent =~ /Copyright|\(c\)/i) {
+ # do process xml files that have a copyright attribute
+ last LICENSE_CASE if ($headContent =~ /copyright=/i);
+ # do process DTD files that have a copyright attribute
+ last LICENSE_CASE if ($headContent =~ /copyright CDATA/i);
+ # do process css files that have a .copyright section
+ last LICENSE_CASE if ($headContent =~ /\.copyright/i);
+ # do process files that just talk about copyright
+ last LICENSE_CASE if ($headContent =~ /copyright statement/i);
+ $countLicenseOther++; $licenseType = "Other";
+ $existsLicense = 1; $countLicense++;
+ last LICENSE_CASE;
+ }
+ # catchall
+ if ($headContent =~ /re[ -]*distribut/i) {
+ $countLicenseOther++; $licenseType = "Other";
+ $existsLicense = 1; $countLicense++;
+ last LICENSE_CASE;
+ }
+ }
+
+ # Try to detect if a new AL-20 license has been accidently inserted
+ # as well as having some other license.
+ # FIXME: If a practice run reveals more types of Foregin copyright
+ # then add patterns here.
+ if ($licenseType =~ /AL-20/) {
+ if (($headContent =~ /Rights Reserved/i) ||
+ ($headContent =~ /Public Domain/i) ||
+ ($headContent =~ /Copyright.*Copyright/i)) {
+ $warnDualLicense = 1; $dualLicensesDetected++;
+ }
+ }
+
+ # ensure that xml files have an xml declaration
+ if ($headContent =~ /^<\?xml/) { $existsXmlDecl = 1; }
+
+ $/ = "\n"; # reset input record separator
+
+ my $recognisedFileType = 0; my $thisFileType = "unknown";
+ foreach $fileType (keys %fileTypes) {
+ if ($fileType eq $ext) {
+ $recognisedFileType = 1;
+ $thisFileType = $fileTypes{$fileType}{type};
+ last;
+ }
+ }
+ print "extension=$ext, fileType=$thisFileType, ";
+ if (!$existsXmlDecl && ($thisFileType eq "xml")) {
+ print "XML file does not have XML Declaration so skipping\n";
+ $countXmlDeclMissing++;
+ return;
+ }
+ if ($existsLicense) {
+ if ($licenseType !~ /^AL/) { print "WARN: "; }
+ print "Found existing license (licenseType=$licenseType) so skipping";
+ if ($warnAL20OldLicense) { print ", WARN: old AL-20 copyright notice"; }
+ if ($warnAL20aOldLicense) { print ", WARN: old AL-20a copyright notice"; }
+ if ($warnDualLicense) { print ", WARN: dual license"; }
+ print "\n";
+ return;
+ }
+ foreach $fileType (@ignoreFileTypes) {
+ if ($fileType eq $ext) {
+ $countIgnoreType++;
+ print "ignored, ";
+ }
+ }
+ if (!$recognisedFileType) {
+ print "File type '$ext' is not recognised so skipping\n";
+ $countUnknownType++;
+ return;
+ }
+
+ # Now process the file.
+ my $insertionDone = 0; my ($line, $thisLine);
+ if (!$opt_p) {
+ open(OUTPUT, ">$tmpFile")
+ or die "Could not open output file '$tmpFile': $!";
+ }
+ $countInserted++;
+ if ($fileTypes{$ext}{insertionPoint} == 0) {
+ print "Insert new license\n";
+ if (!$opt_p) {
+ print OUTPUT $fileTypes{$ext}{openComment};
+ foreach $line (@license) {
+ $thisLine = $fileTypes{$ext}{leaderComment} . $line;
+ $thisLine =~ s/\s+$//;
+ print OUTPUT $thisLine, "\n";
+ }
+ print OUTPUT $fileTypes{$ext}{closeComment};
+ }
+ $insertionDone = 1;
+ }
+ seek(INPUT, 0, 0); $. = 0; # rewind to top of file
+ while (<INPUT>) {
+ if (!$opt_p) {
+ print OUTPUT $_ or die "Could not write output file '$fileName': $!";
+ }
+ if (!$insertionDone) {
+ if ($. == $fileTypes{$ext}{insertionPoint}) {
+ print "Insert new license\n";
+ if (!$opt_p) {
+ print OUTPUT $fileTypes{$ext}{openComment};
+ foreach $line (@license) {
+ $thisLine = $fileTypes{$ext}{leaderComment} . $line;
+ $thisLine =~ s/\s+$//;
+ print OUTPUT $thisLine, "\n";
+ }
+ print OUTPUT $fileTypes{$ext}{closeComment};
+ }
+ $insertionDone = 1;
+ }
+ }
+ }
+ close INPUT or die "Could not close input file '$fileName': $!";
+ if (!$opt_p) {
+ close OUTPUT or die "Could not close output file '$tmpFile': $!";
+ rename($tmpFile, $fileName);
+ }
+}
+find(\&process_file, $startDir);
+
+#--------------------------------------------------
+# Report some statistics
+my $statsMsg = "were";
+if ($opt_p) { $statsMsg = "would be"; }
+$countUnknownType -= $countIgnoreType;
+print STDERR qq!
+Total $countTotal text files were investigated.
+New licenses $statsMsg inserted in $countInserted files.
+Skipped $countLicense files with an existing license:
+ (Apache v2.0=$countLicense20, v2.0a=$countLicense20a, v2.0b=$countLicense20b)
+ (Apache v1.2=$countLicense12, v1.1=$countLicense11, v1.0=$countLicense10)
+ (Other=$countLicenseOther, PublicDomain=$countLicensePD)
+ (3rdParty using AL v2.0=$countLicenseF20, v1.2=$countLicenseF12, v1.1=$countLicenseF11)
+Skipped $countXmlDeclMissing XML files with missing XML Declaration.
+!;
+if (defined($avoidList)) {
+ print STDERR "Avoided $countAvoid files as specified in the avoidList\n";
+}
+print STDERR qq!
+Ignored $countIgnoreType files of specified type (@ignoreFileTypes)
+Skipped $countUnknownType files of unknown type.
+!;
+if ($dualLicensesDetected) {
+ print STDERR qq!
+WARNING: $dualLicensesDetected files had another license as well as the new
+Apache v2.0 license. (Scan the log output for lines with "WARN: dual".)
+!;
+}
+my $suffix;
+if ($countUnknownType > 0) {
+ print STDERR qq!
+List of unknown filename extensions and ignored filename extensions:
+(Add new fileTypes to this script if you want them to be catered for.)
+!;
+ foreach $suffix ( sort keys %uniqueSuffixes) {
+ my $suffixKnown = 0;
+ foreach $fileType (keys %fileTypes) {
+ if ($suffix eq $fileType) { $suffixKnown = 1; }
+ }
+ if (!$suffixKnown) {
+ print STDERR "$suffix=$uniqueSuffixes{$suffix} ";
+ }
+ }
+ print STDERR "\n\n";
+}
+print STDERR "List of all unique filename extensions:\n";
+foreach $suffix ( sort keys %uniqueSuffixes) {
+ print STDERR "$suffix=$uniqueSuffixes{$suffix} ";
+}
+print STDERR "\n\n";
+if ($opt_p) { print STDERR "Finished practice run.\n"; }
+
+#==================================================
+# ShowUsage
+#==================================================
+
+sub ShowUsage {
+ print STDERR qq!
+Usage: $0 [-h] [-p] startDir [avoidList] > logfile
+
+ where:
+ startDir = The SVN directory (pathname) to start processing. Will descend.
+ avoidList = List of files and directories to avoid, one per line.
+
+ option:
+ h = Show this help message.
+ p = Do a practice run. Do not write any files.
+
+!;
+}
Propchange: incubator/stanbol/trunk/conventions/insert_license.pl
------------------------------------------------------------------------------
svn:executable = *