You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@netbeans.apache.org by jl...@apache.org on 2017/09/21 18:08:50 UTC

[05/12] incubator-netbeans-tools git commit: A first attempt to create a tool to categorize license headers.

A first attempt to create a tool to categorize license headers.


Project: http://git-wip-us.apache.org/repos/asf/incubator-netbeans-tools/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-netbeans-tools/commit/aaa4ac02
Tree: http://git-wip-us.apache.org/repos/asf/incubator-netbeans-tools/tree/aaa4ac02
Diff: http://git-wip-us.apache.org/repos/asf/incubator-netbeans-tools/diff/aaa4ac02

Branch: refs/heads/master
Commit: aaa4ac02885268d59de82d7a21bef0f41e3e26c6
Parents: 3cdd16d
Author: Jan Lahoda <jl...@netbeans.org>
Authored: Sat Sep 9 10:38:48 2017 +0200
Committer: Jan Lahoda <jl...@netbeans.org>
Committed: Sat Sep 9 10:38:48 2017 +0200

----------------------------------------------------------------------
 .gitignore                                  |   4 +-
 convert/nbproject/project.properties        |   6 +-
 convert/src/convert/CategorizeLicenses.java | 174 +++++++++++++++++++++++
 3 files changed, 181 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-netbeans-tools/blob/aaa4ac02/.gitignore
----------------------------------------------------------------------
diff --git a/.gitignore b/.gitignore
index 34f5785..f4a9236 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1 +1,3 @@
-/convert/nbproject/private/
\ No newline at end of file
+/convert/nbproject/private/
+/convert/build/
+/convert/dist/
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-netbeans-tools/blob/aaa4ac02/convert/nbproject/project.properties
----------------------------------------------------------------------
diff --git a/convert/nbproject/project.properties b/convert/nbproject/project.properties
index 0227ea7..4b111c8 100644
--- a/convert/nbproject/project.properties
+++ b/convert/nbproject/project.properties
@@ -1,9 +1,10 @@
 annotation.processing.enabled=true
 annotation.processing.enabled.in.editor=false
-annotation.processing.processor.options=
 annotation.processing.processors.list=
 annotation.processing.run.all.processors=true
 annotation.processing.source.output=${build.generated.sources.dir}/ap-source-output
+application.title=convert
+application.vendor=lahvac
 build.classes.dir=${build.dir}/classes
 build.classes.excludes=**/*.java,**/*.form
 # This directory is removed when the project is cleaned:
@@ -32,6 +33,7 @@ dist.jar=${dist.dir}/convert.jar
 dist.javadoc.dir=${dist.dir}/javadoc
 dist.jlink.dir=${dist.dir}/jlink
 dist.jlink.output=${dist.jlink.dir}/convert
+endorsed.classpath=
 excludes=
 includes=**
 jar.compress=false
@@ -71,7 +73,7 @@ jlink.additionalmodules=
 jlink.additionalparam=
 jlink.launcher=true
 jlink.launcher.name=convert
-main.class=
+main.class=convert.CategorizeLicenses
 manifest.file=manifest.mf
 meta.inf.dir=${src.dir}/META-INF
 mkdist.disabled=false

http://git-wip-us.apache.org/repos/asf/incubator-netbeans-tools/blob/aaa4ac02/convert/src/convert/CategorizeLicenses.java
----------------------------------------------------------------------
diff --git a/convert/src/convert/CategorizeLicenses.java b/convert/src/convert/CategorizeLicenses.java
new file mode 100644
index 0000000..ee7d5c1
--- /dev/null
+++ b/convert/src/convert/CategorizeLicenses.java
@@ -0,0 +1,174 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package convert;
+
+import java.io.IOException;
+import java.io.Writer;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+import java.util.stream.Collectors;
+
+public class CategorizeLicenses {
+
+    public static void main(String[] args) throws IOException {
+        if (args.length != 2) {
+            System.err.println("Use: CategorizeLicenses <source-directory> <target-directory>");
+            return ;
+        }
+        Path root = Paths.get(args[0]);
+        Map<String, List<String>> licenses = new HashMap<>();
+        Map<String, List<String>> paragraphs = new HashMap<>();
+        Set<String> noCDDL = new HashSet<>();
+        Set<String> cddlNotRecognized = new HashSet<>();
+        Files.find(root, Integer.MAX_VALUE, (p, attr) -> attr.isRegularFile())
+             .forEach(p -> {
+                try {
+                    String path = root.relativize(p).toString();
+                    String code = new String(Files.readAllBytes(p));
+
+                    if (code.contains("CDDL")) {
+                        String lic = snipLicense(code, p);
+
+                        if (lic != null && lic.contains("CDDL")) {
+                            lic = YEARS_PATTERN.matcher(lic).replaceAll(Matcher.quoteReplacement("<YEARS>"));
+                            lic = lic.replaceAll("([^\n])\n([^\n])", "$1 $2");
+                            lic = lic.replaceAll("[ \t]+", " ");
+                            licenses.computeIfAbsent(lic, l -> new ArrayList<>()).add(path);
+                            for (String par : lic.split("\n")) {
+                                paragraphs.computeIfAbsent(par, l -> new ArrayList<>()).add(path);
+                            }
+                            return ;
+                        }
+                    
+                        cddlNotRecognized.add(path);
+                        return ;
+                    }
+                    noCDDL.add(path);
+                } catch (IOException ex) {
+                    ex.printStackTrace();
+                }
+             });
+        
+        Path target = Paths.get(args[1]);
+
+        int i = 0;
+        for (Map.Entry<String, List<String>> e : licenses.entrySet()) {
+            try (Writer w = Files.newBufferedWriter(target.resolve("lic" + i++))) {
+                w.write(e.getKey());
+                w.write("\n\n");
+                for (String file : e.getValue()) {
+                    w.write(file);
+                    w.write("\n");
+                }
+            }
+        }
+        System.err.println("licenses count: " + licenses.size());
+        System.err.println("paragraphs count: " + paragraphs.size());
+        
+        System.err.println("cddl, unrecognized file: " + cddlNotRecognized.size());
+        System.err.println("no cddl license: " + noCDDL.size());
+
+        dump(licenses, target, "lic");
+        dump(paragraphs, target, "par");
+        dump(Collections.singletonMap("Files which contain string CDDL, but their comment structure is not (yet) recognized.", cddlNotRecognized), target, "have-cddl-not-recognized-filetype");
+        dump(Collections.singletonMap("Files which do not contain string CDDL", noCDDL), target, "do-not-have-cddl");
+    }
+        private static final Pattern YEARS_PATTERN = Pattern.compile("[12][019][0-9][0-9]([ \t]*[-,/][ \t]*[12][019][0-9][0-9])?");
+
+    private static void dump(Map<String, ? extends Collection<String>> cat, Path target, String name) throws IOException {
+        int i = 0;
+        for (Map.Entry<String, ? extends Collection<String>> e : cat.entrySet()) {
+            try (Writer w = Files.newBufferedWriter(target.resolve(name + i++))) {
+                w.write(e.getKey());
+                w.write("\n\n");
+                w.write("files:\n");
+                e.getValue().stream().sorted().forEach(file -> {
+                    try {
+                        w.write(file);
+                        w.write("\n");
+                    } catch (IOException ex) {
+                        throw new IllegalStateException(ex);
+                    }
+                });
+            }
+        }
+    }
+    private static String snipLicense(String code, Path file) {
+        String fn = file.getFileName().toString();
+        switch (fn.substring(fn.lastIndexOf('.') + 1)) {
+            case "javx": case "c": case "h": case "cpp":
+            case "java": return snipLicense(code, "/\\*+", "\\*+/", "^[ \t]*\\**[ \t]*");
+            case "html": case "xsd": case "xsl": case "dtd":
+            case "settings": case "wstcgrp": case "wstcref":
+            case "wsgrp": 
+            case "xml": return snipLicense(code, "<!--+", "-+->", "^[ \t]*");
+            case "sh": return snipLicenseBundle(code, "#!.*");
+            case "properties": return snipLicenseBundle(code, null);
+        }
+        
+        return null;
+    }
+
+    private static String snipLicense(String code, String commentStart, String commentEnd, String normalizeLines) {
+        Matcher startM = Pattern.compile(commentStart).matcher(code);
+        if (!startM.find())
+            return null;
+        Matcher endM = Pattern.compile(commentEnd).matcher(code);
+        if (!endM.find(startM.end()))
+            return null;
+        String lic = code.substring(startM.end(), endM.start());
+        if (normalizeLines != null) {
+            lic = Arrays.stream(lic.split("\n"))
+                        .map(l -> l.replaceAll(normalizeLines, ""))
+                        .collect(Collectors.joining("\n"));
+        }
+        return lic;
+    }
+    
+    private static String snipLicenseBundle(String code, String firstLinePattern) {
+        StringBuilder res = new StringBuilder();
+        boolean firstLine = true;
+        for (String line : code.split("\n")) {
+            line = line.trim();
+            if (firstLine && firstLinePattern != null && Pattern.compile(firstLinePattern).matcher(line).matches())
+                continue;
+            firstLine = false;
+            if (line.startsWith("#")) {
+                res.append(line.substring(1).trim());
+                res.append("\n");
+            } else {
+                return res.toString();
+            }
+        }
+        return res.toString();
+    }
+    
+}