You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@stanbol.apache.org by fc...@apache.org on 2011/07/13 14:41:29 UTC

svn commit: r1145980 - /incubator/stanbol/trunk/conventions/insert_license.pl

Author: fchrist
Date: Wed Jul 13 12:41:29 2011
New Revision: 1145980

URL: http://svn.apache.org/viewvc?rev=1145980&view=rev
Log:
STANBOL-271 added modified insert_license.pl script. This version has support for Freemarker *.ftl files

Added:
    incubator/stanbol/trunk/conventions/insert_license.pl   (with props)

Added: incubator/stanbol/trunk/conventions/insert_license.pl
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/conventions/insert_license.pl?rev=1145980&view=auto
==============================================================================
--- incubator/stanbol/trunk/conventions/insert_license.pl (added)
+++ incubator/stanbol/trunk/conventions/insert_license.pl Wed Jul 13 12:41:29 2011
@@ -0,0 +1,541 @@
+#!/usr/bin/perl -w
+
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+#=========================================================
+#
+# For files that do not yet have an Apache License, insert the 2.0 license.
+# Adds comment markers for the relevant file type.
+#
+# This can also be used to provide a summary of the current situation.
+# It will detect the presence of various different license headers.
+# Use the -p option for practice mode.
+#
+# Limitations:
+# - Only developed and tested for certain file types. Others will be
+# reported and skipped.
+# Needs tweaks for other types (see "configuration" section below).
+# - Only inserts missing licenses and detects and reports other license types.
+#  See ./update-AL20.pl to update to the current license style.
+#
+# Caveats:
+# - As usual, make a backup of your tree first or be prepared to 'svn revert -R'
+# your working copy if the script stuffs up.
+#
+# WARNING: Be sure to look at the output of this script for warnings.
+# WARNING: Be sure to do the normal 'svn diff' and review.
+# Attend to the warning in tools/copy2license.pl about "collective copyright".
+#
+# Developed only for UNIX, YMMV.
+#
+# Procedure:
+# See ./relicense.txt for an example procedure.
+# Use -p for practise mode.
+# Run the script. It will descend the directory tree.
+# Run with no parameters or -h to show usage.
+#
+#=========================================================
+
+use strict;
+use vars qw($opt_h $opt_p);
+use Getopt::Std;
+use File::Basename;
+use File::Find;
+
+#--------------------------------------------------
+# ensure proper usage
+getopts("hp");
+if ((scalar @ARGV < 1) || defined($opt_h)) {
+  ShowUsage();
+  exit;
+}
+
+my $startDir = shift;
+my $avoidList = shift;
+if (!-e $startDir) {
+  print STDERR qq!
+The start directory '$startDir' does not exist.
+!;
+  ShowUsage();
+  exit;
+}
+if (defined($avoidList) && !-e $avoidList) {
+  print STDERR qq!
+The list of files to avoid '$avoidList' does not exist.
+!;
+  ShowUsage();
+  exit;
+}
+if ($opt_p) { print STDERR "\nDoing practice run. No files will be written\n"; }
+print qq!
+AL-20 = Apache License 2.0 with original Copyright line.
+AL-20a = Apache License 2.0 with original Copyright line and "or its licensors".
+AL-20b = Apache License 2.0 with no Copyright line, i.e. the current style.
+----------------------
+
+!;
+
+#--------------------------------------------------
+# do some configuration
+my $license = qq!Licensed to the Apache Software Foundation (ASF) under one or more
+contributor license agreements.  See the NOTICE file distributed with
+this work for additional information regarding copyright ownership.
+The ASF licenses this file to You under the Apache License, Version 2.0
+(the "License"); you may not use this file except in compliance with
+the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+!;
+my @license = split(/\n/, $license);
+
+# build a hash of filename extensions to be processed
+# together with the particular style of comment marker to use.
+my @xmlFileTypes = (
+  ".xml", ".xsl", ".xslt", ".xmap", ".xcat",
+  ".xconf", ".xroles", ".roles", ".xsp", ".rss",
+  ".xinfo", ".xprofile", ".xsamples", ".xtest", ".xweb", ".xwelcome",
+  ".samplesxconf", ".samplesxpipe", ".svg", ".xhtml", ".xhtml2", ".gt", ".jx", ".jmx",
+  ".jdo", ".orm", ".jdoquery", ".jelly",
+  ".jxt", ".meta", ".pagesheet", ".stx", ".xegrm", ".xgrm", ".xlex", ".xmi",
+  ".xsd", ".rng", ".rdf", ".rdfs", ".xul", ".tld", ".xxe", ".ft", ".fv",
+  ".wsdd", ".wsdl", ".xlog", ".pom",
+);
+my @sgmlFileTypes = (
+  ".dtd", ".mod", ".sgml", ".sgm",
+);
+my @htmlFileTypes = (
+  ".html", ".htm", ".jsp", ".ihtml",
+);
+my @freemarkerFileTypes = (
+  ".ftl",
+);
+my @cFileTypes = (
+  ".java", ".js", ".c", ".h", ".cpp", ".cc", ".cs", ".css", ".egrm", ".grm",
+  ".javascript", ".jj", ".gy", ".g",
+);
+my @shFileTypes = (
+  ".sh", ".ccf", ".pl", ".py", ".sed", ".awk",
+);
+my @propertiesFileTypes = (
+  ".properties", ".rnc", ".rnx", ".prefs", ".rb", ".handlers", ".schemas",
+);
+my @dosFileTypes = (
+  ".bat", ".cmd",
+);
+my @sqlFileTypes = (
+  ".script", ".sql",
+);
+my @vmFileTypes = (
+  ".vm",
+);
+my @ignoreFileTypes = (
+  ".txt", ".dcl", ".ent", ".pen", ".project"
+);
+my (%fileTypes, $fileType);
+foreach $fileType (@xmlFileTypes) {
+  $fileTypes{$fileType}{type} = "xml";
+  $fileTypes{$fileType}{openComment} = "<!--\n";
+  $fileTypes{$fileType}{leaderComment} = "  ";
+  $fileTypes{$fileType}{closeComment} = "-->\n";
+  # insert after line 1 which must be the xml declaration
+  $fileTypes{$fileType}{insertionPoint} = "1";
+}
+foreach $fileType (@sgmlFileTypes) {
+  $fileTypes{$fileType}{type} = "sgml";
+  $fileTypes{$fileType}{openComment} = "<!--\n";
+  $fileTypes{$fileType}{leaderComment} = "  ";
+  $fileTypes{$fileType}{closeComment} = "-->\n";
+  # insert at very top of file
+  $fileTypes{$fileType}{insertionPoint} = "0";
+}
+foreach $fileType (@htmlFileTypes) {
+  $fileTypes{$fileType}{type} = "html";
+  $fileTypes{$fileType}{openComment} = "<!--\n";
+  $fileTypes{$fileType}{leaderComment} = "  ";
+  $fileTypes{$fileType}{closeComment} = "-->\n";
+  # insert at very top of file
+  $fileTypes{$fileType}{insertionPoint} = "0";
+}
+foreach $fileType (@freemarkerFileTypes) {
+  $fileTypes{$fileType}{type} = "html";
+  $fileTypes{$fileType}{openComment} = "<#--\n";
+  $fileTypes{$fileType}{leaderComment} = "  ";
+  $fileTypes{$fileType}{closeComment} = "-->\n";
+  # insert at very top of file
+  $fileTypes{$fileType}{insertionPoint} = "0";
+}
+foreach $fileType (@cFileTypes) {
+  $fileTypes{$fileType}{type} = "C";
+  $fileTypes{$fileType}{openComment} = "/*\n";
+  $fileTypes{$fileType}{leaderComment} = "* ";
+  $fileTypes{$fileType}{closeComment} = "*/\n";
+  # insert at very top of file
+  $fileTypes{$fileType}{insertionPoint} = "0";
+}
+foreach $fileType (@shFileTypes) {
+  $fileTypes{$fileType}{type} = "sh";
+  $fileTypes{$fileType}{openComment} = "\n";
+  $fileTypes{$fileType}{leaderComment} = "# ";
+  $fileTypes{$fileType}{closeComment} = "\n";
+  # insert after line 1 which must be #! script invocation
+  $fileTypes{$fileType}{insertionPoint} = "1";
+}
+foreach $fileType (@propertiesFileTypes) {
+  $fileTypes{$fileType}{type} = "properties";
+  $fileTypes{$fileType}{openComment} = "";
+  $fileTypes{$fileType}{leaderComment} = "# ";
+  $fileTypes{$fileType}{closeComment} = "\n";
+  # insert at very top of file
+  $fileTypes{$fileType}{insertionPoint} = "0";
+}
+foreach $fileType (@dosFileTypes) {
+  $fileTypes{$fileType}{type} = "dos";
+  $fileTypes{$fileType}{openComment} = "\@echo off\n";
+  $fileTypes{$fileType}{leaderComment} = "rem ";
+  $fileTypes{$fileType}{closeComment} = "\n";
+  # insert at very top of file
+  $fileTypes{$fileType}{insertionPoint} = "0";
+}
+foreach $fileType (@sqlFileTypes) {
+  $fileTypes{$fileType}{type} = "sql";
+  $fileTypes{$fileType}{openComment} = "";
+  $fileTypes{$fileType}{leaderComment} = "-- ";
+  $fileTypes{$fileType}{closeComment} = "\n";
+  # insert at very top of file
+  $fileTypes{$fileType}{insertionPoint} = "0";
+}
+foreach $fileType (@vmFileTypes) {
+  $fileTypes{$fileType}{type} = "vm";
+  $fileTypes{$fileType}{openComment} = "#*\n";
+  $fileTypes{$fileType}{leaderComment} = "  ";
+  $fileTypes{$fileType}{closeComment} = "*#\n";
+  # insert after line 1 which must be the xml declaration
+  $fileTypes{$fileType}{insertionPoint} = "1";
+}
+
+my ($countTotal, $countUnknownType, $countIgnoreType) = (0, 0, 0);
+my ($countXmlDeclMissing, $countInserted, $countAvoid) = (0, 0, 0);
+my ($countLicense, $countLicense10, $countLicense11, $countLicense12) = (0, 0, 0, 0);
+my ($countLicensePD, $countLicenseOther) = (0, 0);
+my ($countLicense20, $countLicense20a, $countLicense20b) = (0, 0, 0);
+
+# 3rdParty users of an Apache License
+my ($countLicenseF20, $countLicenseF11, $countLicenseF12) = (0, 0, 0);
+
+my $dualLicensesDetected = 0;
+my %uniqueSuffixes;
+my @avoidList;
+
+# read the avoidList
+if (defined($avoidList)) {
+  open(INPUT, "<$avoidList") or die "Could not open input file '$avoidList': $!";
+  while (<INPUT>) {
+    next if (/^#/);
+    chomp;
+    push(@avoidList, $_);
+  }
+  close INPUT;
+}
+
+#--------------------------------------------------
+sub process_file {
+  return unless -f && -T; # process only text files
+  my $fileName = $File::Find::name;
+  my ($file, $dir, $ext) = fileparse($fileName, qr/\.[^.]*/);
+  return if ($dir =~ /\/CVS\//); # skip CVS directories
+  return if ($dir =~ /\/\.svn\//); # skip SVN directories
+  return if ($fileName =~ /.cvsignore/); # skip 
+  return if ($file =~ /^\./); # skip hidden files
+  foreach my $avoidFn (@avoidList) {
+    if ($fileName =~ /$avoidFn/) {
+      $countAvoid++;
+      return;
+    }
+  }
+  $countTotal++;
+  if ($ext eq "") { $ext = "NoExtension"; }
+  $uniqueSuffixes{$ext}++;
+  print "$fileName, ";
+  my $tmpFile = $fileName . ".tmp";
+  open(INPUT, "<$fileName") or die "Could not open input file '$fileName': $!";
+
+  # First do some tests on the file to ensure it does not already have a license
+  # and ensure that XML files have an xml declaration.
+  my ($existsLicense, $warnDualLicense, $existsXmlDecl) = (0, 0, 0);
+  my ($warnAL20OldLicense) = 0;
+  my ($warnAL20aOldLicense) = 0;
+  my $licenseType = "";
+  undef $/;  # slurp the whole file
+  my $content = <INPUT>;
+  # we want our matches to happen only in the top part of the file
+# NOTE: You may want to relax this from time-to-time to find
+# all possible dual-license issues.
+  my $headContent = substr($content, 0, 1500);
+  $headContent =~ s/[ \t]+/ /g;
+
+  # detect various existing licenses
+  LICENSE_CASE: {
+    if ($headContent =~ /Licensed to the Apache Software Foundation \(ASF\) under/) {
+      $existsLicense = 1; $countLicense++;
+      $countLicense20b++; $licenseType = "AL-20b";
+      last LICENSE_CASE;
+    }
+    if ($headContent =~ /Licensed under the Apache License.*Version 2.0/) {
+      $existsLicense = 1; $countLicense++;
+      if ($headContent =~ /Apache Software Foundation or its licensors/) {
+        $countLicense20a++; $licenseType = "AL-20a";
+        $warnAL20aOldLicense = 1;
+      }
+      else {
+        if ($headContent =~ /Copyright.*Apache Software Foundation/) {
+          $countLicense20++; $licenseType = "AL-20";
+          $warnAL20OldLicense = 1;
+        }
+        else {
+          $countLicenseF20++; $licenseType = "F-AL-20";
+        }
+      }
+      last LICENSE_CASE;
+    }
+    if ($headContent =~ /The Apache Software License.*Version 1.2/) {
+      $existsLicense = 1; $countLicense++;
+      if ($headContent =~ /Copyright.*Apache Software Foundation/) {
+        $countLicense12++; $licenseType = "AL-12";
+      }
+      else {
+        $countLicenseF12++; $licenseType = "F-AL-12";
+      }
+      last LICENSE_CASE;
+    }
+    if ($headContent =~ /The Apache Software License.*Version 1.1/) {
+      $existsLicense = 1; $countLicense++;
+      if ($headContent =~ /Copyright.*Apache Software Foundation/) {
+        $countLicense11++; $licenseType = "AL-11";
+      }
+      else {
+        $countLicenseF11++; $licenseType = "F-AL-11";
+      }
+      last LICENSE_CASE;
+    }
+    if ($headContent =~ /Copyright.*The Apache Group/) {
+      $countLicense10++; $licenseType = "AL-10";
+      $existsLicense = 1; $countLicense++;
+      last LICENSE_CASE;
+    }
+    if ($headContent =~ /Public Domain.*/i) {
+      $countLicensePD++; $licenseType = "PublicDomain";
+      $existsLicense = 1; $countLicense++;
+      last LICENSE_CASE;
+    }
+    # catchall
+    if ($headContent =~ /Copyright|\(c\)/i) {
+      # do process xml files that have a copyright attribute
+      last LICENSE_CASE if ($headContent =~ /copyright=/i);
+      # do process DTD files that have a copyright attribute
+      last LICENSE_CASE if ($headContent =~ /copyright CDATA/i);
+      # do process css files that have a .copyright section
+      last LICENSE_CASE if ($headContent =~ /\.copyright/i);
+      # do process files that just talk about copyright
+      last LICENSE_CASE if ($headContent =~ /copyright statement/i);
+      $countLicenseOther++; $licenseType = "Other";
+      $existsLicense = 1; $countLicense++;
+      last LICENSE_CASE;
+    }
+    # catchall
+    if ($headContent =~ /re[ -]*distribut/i) {
+      $countLicenseOther++; $licenseType = "Other";
+      $existsLicense = 1; $countLicense++;
+      last LICENSE_CASE;
+    }
+  }
+
+  # Try to detect if a new AL-20 license has been accidently inserted
+  # as well as having some other license.
+  # FIXME: If a practice run reveals more types of Foregin copyright
+  # then add patterns here.
+  if ($licenseType =~ /AL-20/) {
+    if (($headContent =~ /Rights Reserved/i) ||
+        ($headContent =~ /Public Domain/i) ||
+        ($headContent =~ /Copyright.*Copyright/i)) {
+      $warnDualLicense = 1; $dualLicensesDetected++;
+    }
+  }
+
+  # ensure that xml files have an xml declaration
+  if ($headContent =~ /^<\?xml/) { $existsXmlDecl = 1; }
+
+  $/ = "\n"; # reset input record separator
+
+  my $recognisedFileType = 0; my $thisFileType = "unknown";
+  foreach $fileType (keys %fileTypes) {
+    if ($fileType eq $ext) {
+      $recognisedFileType = 1;
+      $thisFileType = $fileTypes{$fileType}{type};
+      last;
+    }
+  }
+  print "extension=$ext, fileType=$thisFileType, ";
+  if (!$existsXmlDecl && ($thisFileType eq "xml")) {
+    print "XML file does not have XML Declaration so skipping\n";
+    $countXmlDeclMissing++;
+    return;
+  }
+  if ($existsLicense) {
+    if ($licenseType !~ /^AL/) { print "WARN: "; }
+    print "Found existing license (licenseType=$licenseType) so skipping";
+    if ($warnAL20OldLicense) { print ", WARN: old AL-20 copyright notice"; }
+    if ($warnAL20aOldLicense) { print ", WARN: old AL-20a copyright notice"; }
+    if ($warnDualLicense) { print ", WARN: dual license"; }
+    print "\n";
+    return;
+  }
+  foreach $fileType (@ignoreFileTypes) {
+    if ($fileType eq $ext) {
+      $countIgnoreType++;
+      print "ignored, ";
+    }
+  }
+  if (!$recognisedFileType) {
+    print "File type '$ext' is not recognised so skipping\n";
+    $countUnknownType++;
+    return;
+  }
+
+  # Now process the file.
+  my $insertionDone = 0; my ($line, $thisLine);
+  if (!$opt_p) {
+    open(OUTPUT, ">$tmpFile")
+      or die "Could not open output file '$tmpFile': $!";
+  }
+  $countInserted++;
+  if ($fileTypes{$ext}{insertionPoint} == 0) {
+    print "Insert new license\n";
+    if (!$opt_p) {
+      print OUTPUT $fileTypes{$ext}{openComment};
+      foreach $line (@license) {
+        $thisLine = $fileTypes{$ext}{leaderComment} . $line;
+        $thisLine =~ s/\s+$//;
+        print OUTPUT $thisLine, "\n";
+      }
+      print OUTPUT $fileTypes{$ext}{closeComment};
+    }
+    $insertionDone = 1;
+  }
+  seek(INPUT, 0, 0); $. = 0; # rewind to top of file
+  while (<INPUT>) {
+    if (!$opt_p) {
+      print OUTPUT $_ or die "Could not write output file '$fileName': $!";
+    }
+    if (!$insertionDone) {
+      if ($. == $fileTypes{$ext}{insertionPoint}) {
+        print "Insert new license\n";
+        if (!$opt_p) {
+          print OUTPUT $fileTypes{$ext}{openComment};
+          foreach $line (@license) {
+            $thisLine = $fileTypes{$ext}{leaderComment} . $line;
+            $thisLine =~ s/\s+$//;
+            print OUTPUT $thisLine, "\n";
+          }
+          print OUTPUT $fileTypes{$ext}{closeComment};
+        }
+        $insertionDone = 1;
+      }
+    }
+  }
+  close INPUT or die "Could not close input file '$fileName': $!";
+  if (!$opt_p) {
+    close OUTPUT or die "Could not close output file '$tmpFile': $!";
+    rename($tmpFile, $fileName);
+  }
+}
+find(\&process_file, $startDir);
+
+#--------------------------------------------------
+# Report some statistics
+my $statsMsg = "were";
+if ($opt_p) { $statsMsg = "would be"; }
+$countUnknownType -= $countIgnoreType;
+print STDERR qq!
+Total $countTotal text files were investigated.
+New licenses $statsMsg inserted in $countInserted files.
+Skipped $countLicense files with an existing license:
+ (Apache v2.0=$countLicense20, v2.0a=$countLicense20a, v2.0b=$countLicense20b)
+ (Apache v1.2=$countLicense12, v1.1=$countLicense11, v1.0=$countLicense10)
+ (Other=$countLicenseOther, PublicDomain=$countLicensePD)
+ (3rdParty using AL v2.0=$countLicenseF20, v1.2=$countLicenseF12, v1.1=$countLicenseF11)
+Skipped $countXmlDeclMissing XML files with missing XML Declaration.
+!;
+if (defined($avoidList)) {
+  print STDERR "Avoided $countAvoid files as specified in the avoidList\n";
+}
+print STDERR qq!
+Ignored $countIgnoreType files of specified type (@ignoreFileTypes)
+Skipped $countUnknownType files of unknown type.
+!;
+if ($dualLicensesDetected) {
+  print STDERR qq!
+WARNING: $dualLicensesDetected files had another license as well as the new
+Apache v2.0 license. (Scan the log output for lines with "WARN: dual".)
+!;
+}
+my $suffix;
+if ($countUnknownType > 0) {
+  print STDERR qq!
+List of unknown filename extensions and ignored filename extensions:
+(Add new fileTypes to this script if you want them to be catered for.)
+!;
+  foreach $suffix ( sort keys %uniqueSuffixes) {
+    my $suffixKnown = 0;
+    foreach $fileType (keys %fileTypes) {
+      if ($suffix eq $fileType) { $suffixKnown = 1; }
+    }
+    if (!$suffixKnown) {
+      print STDERR "$suffix=$uniqueSuffixes{$suffix} ";
+    }
+  }
+  print STDERR "\n\n";
+}
+print STDERR "List of all unique filename extensions:\n";
+foreach $suffix ( sort keys %uniqueSuffixes) {
+  print STDERR "$suffix=$uniqueSuffixes{$suffix} ";
+}
+print STDERR "\n\n";
+if ($opt_p) { print STDERR "Finished practice run.\n"; }
+
+#==================================================
+# ShowUsage
+#==================================================
+
+sub ShowUsage {
+  print STDERR qq!
+Usage: $0 [-h] [-p] startDir [avoidList] > logfile
+                                                                                
+  where:
+  startDir = The SVN directory (pathname) to start processing. Will descend.
+  avoidList = List of files and directories to avoid, one per line.
+
+  option:
+  h = Show this help message.
+  p = Do a practice run. Do not write any files.
+
+!;
+}

Propchange: incubator/stanbol/trunk/conventions/insert_license.pl
------------------------------------------------------------------------------
    svn:executable = *