You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@netbeans.apache.org by jl...@apache.org on 2017/09/21 18:08:50 UTC
[05/12] incubator-netbeans-tools git commit: A first attempt to
create a tool to categorize license headers.
A first attempt to create a tool to categorize license headers.
Project: http://git-wip-us.apache.org/repos/asf/incubator-netbeans-tools/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-netbeans-tools/commit/aaa4ac02
Tree: http://git-wip-us.apache.org/repos/asf/incubator-netbeans-tools/tree/aaa4ac02
Diff: http://git-wip-us.apache.org/repos/asf/incubator-netbeans-tools/diff/aaa4ac02
Branch: refs/heads/master
Commit: aaa4ac02885268d59de82d7a21bef0f41e3e26c6
Parents: 3cdd16d
Author: Jan Lahoda <jl...@netbeans.org>
Authored: Sat Sep 9 10:38:48 2017 +0200
Committer: Jan Lahoda <jl...@netbeans.org>
Committed: Sat Sep 9 10:38:48 2017 +0200
----------------------------------------------------------------------
.gitignore | 4 +-
convert/nbproject/project.properties | 6 +-
convert/src/convert/CategorizeLicenses.java | 174 +++++++++++++++++++++++
3 files changed, 181 insertions(+), 3 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-netbeans-tools/blob/aaa4ac02/.gitignore
----------------------------------------------------------------------
diff --git a/.gitignore b/.gitignore
index 34f5785..f4a9236 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1 +1,3 @@
-/convert/nbproject/private/
\ No newline at end of file
+/convert/nbproject/private/
+/convert/build/
+/convert/dist/
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/incubator-netbeans-tools/blob/aaa4ac02/convert/nbproject/project.properties
----------------------------------------------------------------------
diff --git a/convert/nbproject/project.properties b/convert/nbproject/project.properties
index 0227ea7..4b111c8 100644
--- a/convert/nbproject/project.properties
+++ b/convert/nbproject/project.properties
@@ -1,9 +1,10 @@
annotation.processing.enabled=true
annotation.processing.enabled.in.editor=false
-annotation.processing.processor.options=
annotation.processing.processors.list=
annotation.processing.run.all.processors=true
annotation.processing.source.output=${build.generated.sources.dir}/ap-source-output
+application.title=convert
+application.vendor=lahvac
build.classes.dir=${build.dir}/classes
build.classes.excludes=**/*.java,**/*.form
# This directory is removed when the project is cleaned:
@@ -32,6 +33,7 @@ dist.jar=${dist.dir}/convert.jar
dist.javadoc.dir=${dist.dir}/javadoc
dist.jlink.dir=${dist.dir}/jlink
dist.jlink.output=${dist.jlink.dir}/convert
+endorsed.classpath=
excludes=
includes=**
jar.compress=false
@@ -71,7 +73,7 @@ jlink.additionalmodules=
jlink.additionalparam=
jlink.launcher=true
jlink.launcher.name=convert
-main.class=
+main.class=convert.CategorizeLicenses
manifest.file=manifest.mf
meta.inf.dir=${src.dir}/META-INF
mkdist.disabled=false
http://git-wip-us.apache.org/repos/asf/incubator-netbeans-tools/blob/aaa4ac02/convert/src/convert/CategorizeLicenses.java
----------------------------------------------------------------------
diff --git a/convert/src/convert/CategorizeLicenses.java b/convert/src/convert/CategorizeLicenses.java
new file mode 100644
index 0000000..ee7d5c1
--- /dev/null
+++ b/convert/src/convert/CategorizeLicenses.java
@@ -0,0 +1,174 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package convert;
+
+import java.io.IOException;
+import java.io.Writer;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+import java.util.stream.Collectors;
+
+public class CategorizeLicenses {
+
+ public static void main(String[] args) throws IOException {
+ if (args.length != 2) {
+ System.err.println("Use: CategorizeLicenses <source-directory> <target-directory>");
+ return ;
+ }
+ Path root = Paths.get(args[0]);
+ Map<String, List<String>> licenses = new HashMap<>();
+ Map<String, List<String>> paragraphs = new HashMap<>();
+ Set<String> noCDDL = new HashSet<>();
+ Set<String> cddlNotRecognized = new HashSet<>();
+ Files.find(root, Integer.MAX_VALUE, (p, attr) -> attr.isRegularFile())
+ .forEach(p -> {
+ try {
+ String path = root.relativize(p).toString();
+ String code = new String(Files.readAllBytes(p));
+
+ if (code.contains("CDDL")) {
+ String lic = snipLicense(code, p);
+
+ if (lic != null && lic.contains("CDDL")) {
+ lic = YEARS_PATTERN.matcher(lic).replaceAll(Matcher.quoteReplacement("<YEARS>"));
+ lic = lic.replaceAll("([^\n])\n([^\n])", "$1 $2");
+ lic = lic.replaceAll("[ \t]+", " ");
+ licenses.computeIfAbsent(lic, l -> new ArrayList<>()).add(path);
+ for (String par : lic.split("\n")) {
+ paragraphs.computeIfAbsent(par, l -> new ArrayList<>()).add(path);
+ }
+ return ;
+ }
+
+ cddlNotRecognized.add(path);
+ return ;
+ }
+ noCDDL.add(path);
+ } catch (IOException ex) {
+ ex.printStackTrace();
+ }
+ });
+
+ Path target = Paths.get(args[1]);
+
+ int i = 0;
+ for (Map.Entry<String, List<String>> e : licenses.entrySet()) {
+ try (Writer w = Files.newBufferedWriter(target.resolve("lic" + i++))) {
+ w.write(e.getKey());
+ w.write("\n\n");
+ for (String file : e.getValue()) {
+ w.write(file);
+ w.write("\n");
+ }
+ }
+ }
+ System.err.println("licenses count: " + licenses.size());
+ System.err.println("paragraphs count: " + paragraphs.size());
+
+ System.err.println("cddl, unrecognized file: " + cddlNotRecognized.size());
+ System.err.println("no cddl license: " + noCDDL.size());
+
+ dump(licenses, target, "lic");
+ dump(paragraphs, target, "par");
+ dump(Collections.singletonMap("Files which contain string CDDL, but their comment structure is not (yet) recognized.", cddlNotRecognized), target, "have-cddl-not-recognized-filetype");
+ dump(Collections.singletonMap("Files which do not contain string CDDL", noCDDL), target, "do-not-have-cddl");
+ }
+ private static final Pattern YEARS_PATTERN = Pattern.compile("[12][019][0-9][0-9]([ \t]*[-,/][ \t]*[12][019][0-9][0-9])?");
+
+ private static void dump(Map<String, ? extends Collection<String>> cat, Path target, String name) throws IOException {
+ int i = 0;
+ for (Map.Entry<String, ? extends Collection<String>> e : cat.entrySet()) {
+ try (Writer w = Files.newBufferedWriter(target.resolve(name + i++))) {
+ w.write(e.getKey());
+ w.write("\n\n");
+ w.write("files:\n");
+ e.getValue().stream().sorted().forEach(file -> {
+ try {
+ w.write(file);
+ w.write("\n");
+ } catch (IOException ex) {
+ throw new IllegalStateException(ex);
+ }
+ });
+ }
+ }
+ }
+ private static String snipLicense(String code, Path file) {
+ String fn = file.getFileName().toString();
+ switch (fn.substring(fn.lastIndexOf('.') + 1)) {
+ case "javx": case "c": case "h": case "cpp":
+ case "java": return snipLicense(code, "/\\*+", "\\*+/", "^[ \t]*\\**[ \t]*");
+ case "html": case "xsd": case "xsl": case "dtd":
+ case "settings": case "wstcgrp": case "wstcref":
+ case "wsgrp":
+ case "xml": return snipLicense(code, "<!--+", "-+->", "^[ \t]*");
+ case "sh": return snipLicenseBundle(code, "#!.*");
+ case "properties": return snipLicenseBundle(code, null);
+ }
+
+ return null;
+ }
+
+ private static String snipLicense(String code, String commentStart, String commentEnd, String normalizeLines) {
+ Matcher startM = Pattern.compile(commentStart).matcher(code);
+ if (!startM.find())
+ return null;
+ Matcher endM = Pattern.compile(commentEnd).matcher(code);
+ if (!endM.find(startM.end()))
+ return null;
+ String lic = code.substring(startM.end(), endM.start());
+ if (normalizeLines != null) {
+ lic = Arrays.stream(lic.split("\n"))
+ .map(l -> l.replaceAll(normalizeLines, ""))
+ .collect(Collectors.joining("\n"));
+ }
+ return lic;
+ }
+
+ private static String snipLicenseBundle(String code, String firstLinePattern) {
+ StringBuilder res = new StringBuilder();
+ boolean firstLine = true;
+ for (String line : code.split("\n")) {
+ line = line.trim();
+ if (firstLine && firstLinePattern != null && Pattern.compile(firstLinePattern).matcher(line).matches())
+ continue;
+ firstLine = false;
+ if (line.startsWith("#")) {
+ res.append(line.substring(1).trim());
+ res.append("\n");
+ } else {
+ return res.toString();
+ }
+ }
+ return res.toString();
+ }
+
+}