You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@creadur.apache.org by po...@apache.org on 2015/05/21 23:47:15 UTC

svn commit: r1680956 - in /creadur/rat/trunk: RELEASE_NOTES.txt apache-rat-core/src/main/java/org/apache/rat/document/impl/guesser/BinaryGuesser.java apache-rat-core/src/test/java/org/apache/rat/document/impl/guesser/BinaryGuesserTest.java

Author: pottlinger
Date: Thu May 21 21:47:15 2015
New Revision: 1680956

URL: http://svn.apache.org/r1680956
Log:
RAT-201: Add SWF to list of binary files.

* Refactored tests and added equalsIgnoreCase to manifest files.

Modified:
    creadur/rat/trunk/RELEASE_NOTES.txt
    creadur/rat/trunk/apache-rat-core/src/main/java/org/apache/rat/document/impl/guesser/BinaryGuesser.java
    creadur/rat/trunk/apache-rat-core/src/test/java/org/apache/rat/document/impl/guesser/BinaryGuesserTest.java

Modified: creadur/rat/trunk/RELEASE_NOTES.txt
URL: http://svn.apache.org/viewvc/creadur/rat/trunk/RELEASE_NOTES.txt?rev=1680956&r1=1680955&r2=1680956&view=diff
==============================================================================
--- creadur/rat/trunk/RELEASE_NOTES.txt (original)
+++ creadur/rat/trunk/RELEASE_NOTES.txt Thu May 21 21:47:15 2015
@@ -49,6 +49,7 @@ Rat 0.12 (SNAPSHOT)
     * [RAT-172] - Exclude technical directories of source code management systems and their ignore files from RAT scans. 
                   Enabled for SVN,Git,Mercurial,Bazar and CVS.
     * [RAT-200] - Update to latest ASF parent pom v17.
+    * [RAT-201] - BinaryGuesser should treat *.swf as binary
 
 Rat 0.11
 ========

Modified: creadur/rat/trunk/apache-rat-core/src/main/java/org/apache/rat/document/impl/guesser/BinaryGuesser.java
URL: http://svn.apache.org/viewvc/creadur/rat/trunk/apache-rat-core/src/main/java/org/apache/rat/document/impl/guesser/BinaryGuesser.java?rev=1680956&r1=1680955&r2=1680956&view=diff
==============================================================================
--- creadur/rat/trunk/apache-rat-core/src/main/java/org/apache/rat/document/impl/guesser/BinaryGuesser.java (original)
+++ creadur/rat/trunk/apache-rat-core/src/main/java/org/apache/rat/document/impl/guesser/BinaryGuesser.java Thu May 21 21:47:15 2015
@@ -15,9 +15,11 @@
  * KIND, either express or implied.  See the License for the    *
  * specific language governing permissions and limitations      *
  * under the License.                                           *
- */ 
+ */
 package org.apache.rat.document.impl.guesser;
 
+import org.apache.rat.api.Document;
+
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.Reader;
@@ -29,8 +31,6 @@ import java.nio.charset.CoderResult;
 import java.nio.charset.CodingErrorAction;
 import java.util.Locale;
 
-import org.apache.rat.api.Document;
-
 /**
  * TODO: factor into MIME guesser and MIME->binary guesser
  */
@@ -41,54 +41,44 @@ public class BinaryGuesser {
     private static boolean isBinaryDocument(Document document) {
         boolean result = false;
         InputStream stream = null;
-        try
-        {
+        try {
             stream = document.inputStream();
             result = isBinary(stream);
-        }
-        catch (IOException e)
-        {
+        } catch (IOException e) {
             result = false;
-        }
-        finally
-        {
-            try
-            {
-                if (stream != null)
-                {
+        } finally {
+            try {
+                if (stream != null) {
                     stream.close();
                 }
-            }
-            catch (IOException e)
-            {
+            } catch (IOException e) {
                 // SWALLOW
-            }   
+            }
         }
         return result;
     }
-    
+
     private static boolean isBinary(CharSequence taste) {
         int highBytes = 0;
         final int length = taste.length();
         for (int i = 0; i < length; i++) {
             char c = taste.charAt(i);
             if (c > BinaryGuesser.NON_ASCII_THREASHOLD
-                || c <= BinaryGuesser.ASCII_CHAR_THREASHOLD) {
+                    || c <= BinaryGuesser.ASCII_CHAR_THREASHOLD) {
                 highBytes++;
             }
         }
         return highBytes * BinaryGuesser.HIGH_BYTES_RATIO
-            > length * BinaryGuesser.TOTAL_READ_RATIO;
+                > length * BinaryGuesser.TOTAL_READ_RATIO;
     }
 
     /**
      * @param in the file to check.
-     * 
      * @return Do the first few bytes of the stream hint at a binary file?
-     *
+     * <p/>
      * <p>Any IOException is swallowed internally and the test returns
      * false.</p>
-     *
+     * <p/>
      * <p>This method may lead to false negatives if the reader throws
      * an exception because it can't read characters according to the
      * reader's encoding from the underlying stream.</p>
@@ -108,12 +98,11 @@ public class BinaryGuesser {
 
     /**
      * @param in the file to check.
-     * 
      * @return Do the first few bytes of the stream hint at a binary file?
-     *
+     * <p/>
      * <p>Any IOException is swallowed internally and the test returns
      * false.</p>
-     *
+     * <p/>
      * <p>This method will try to read bytes from the stream and
      * translate them to characters according to the platform's
      * default encoding.  If any bytes can not be translated to
@@ -129,8 +118,8 @@ public class BinaryGuesser {
                 CharBuffer chars = CharBuffer.allocate(2 * bytesRead);
                 Charset cs = Charset.forName(System.getProperty("file.encoding"));
                 CharsetDecoder cd = cs.newDecoder()
-                    .onMalformedInput(CodingErrorAction.REPORT)
-                    .onUnmappableCharacter(CodingErrorAction.REPORT);
+                        .onMalformedInput(CodingErrorAction.REPORT)
+                        .onUnmappableCharacter(CodingErrorAction.REPORT);
                 while (bytes.remaining() > 0) {
                     CoderResult res = cd.decode(bytes, chars, true);
                     if (res.isMalformed() || res.isUnmappable()) {
@@ -154,9 +143,8 @@ public class BinaryGuesser {
         return false;
     }
 
-    
+
     /**
-     * 
      * @param name current file name.
      * @return whether given name is binary.
      */
@@ -165,26 +153,28 @@ public class BinaryGuesser {
     }
 
     /**
-     * @return Is a file by that name a known non-binary file?
      * @param name current file name.
+     * @return Is a file by that name a known non-binary file?
      */
     public static final boolean isNonBinary(final String name) {
-        if (name == null) {return false;}
+        if (name == null) {
+            return false;
+        }
         return extensionMatches(name.toUpperCase(Locale.US),
-                                BinaryGuesser.NON_BINARY_EXTENSIONS);
+                BinaryGuesser.NON_BINARY_EXTENSIONS);
     }
 
     /**
-     * @return Is a file by that name an executable/binary file?
      * @param name current file name.
+     * @return Is a file by that name an executable/binary file?
      */
     public static final boolean isExecutable(final String name) {
         return name.equals(BinaryGuesser.JAVA) || extensionMatches(name, EXE_EXTENSIONS)
-            || containsExtension(name, EXE_EXTENSIONS);
+                || containsExtension(name, EXE_EXTENSIONS);
     }
 
     public static boolean containsExtension(final String name,
-                                             final String[] exts) {
+                                            final String[] exts) {
         for (int i = 0; i < exts.length; i++) {
             if (name.indexOf(DOT + exts[i] + DOT) >= 0) {
                 return true;
@@ -194,7 +184,7 @@ public class BinaryGuesser {
     }
 
     public static boolean extensionMatches(final String name,
-                                            final String[] exts) {
+                                           final String[] exts) {
         for (int i = 0; i < exts.length; i++) {
             if (name.endsWith(DOT + exts[i])) {
                 return true;
@@ -214,140 +204,142 @@ public class BinaryGuesser {
     public static final boolean isKeystore(final String name) {
         return BinaryGuesser.extensionMatches(name, KEYSTORE_EXTENSIONS);
     }
-    
+
     /**
-     * @return Is a file by that name a known binary file?
      * @param name file name.
+     * @return Is a file by that name a known binary file?
      */
     public static final boolean isBinary(final String name) {
-        if (name == null) {return false;}
+        if (name == null) {
+            return false;
+        }
         String normalisedName = GuessUtils.normalise(name);
-        return BinaryGuesser.JAR_MANIFEST.equals(name) || BinaryGuesser.isImage(normalisedName)
-            || BinaryGuesser.isKeystore(normalisedName) || BinaryGuesser.isBytecode(normalisedName)
-            || BinaryGuesser.isBinaryData(normalisedName) || BinaryGuesser.isExecutable(normalisedName);
+        return BinaryGuesser.JAR_MANIFEST.equalsIgnoreCase(name) || BinaryGuesser.isImage(normalisedName)
+                || BinaryGuesser.isKeystore(normalisedName) || BinaryGuesser.isBytecode(normalisedName)
+                || BinaryGuesser.isBinaryData(normalisedName) || BinaryGuesser.isExecutable(normalisedName);
     }
 
     private static final String[] DATA_EXTENSIONS = {
-        "DAT", "DOC",
-        "NCB", "IDB",
-        "SUO", "XCF",
-        "RAJ", "CERT",
-        "KS", "TS",
-        "ODP",
+            "DAT", "DOC",
+            "NCB", "IDB",
+            "SUO", "XCF",
+            "RAJ", "CERT",
+            "KS", "TS",
+            "ODP", "SWF"
     };
 
     private static final String[] EXE_EXTENSIONS = {
-        "EXE", "DLL",
-        "LIB", "SO",
-        "A", "EXP",
+            "EXE", "DLL",
+            "LIB", "SO",
+            "A", "EXP",
     };
 
     private static final String[] KEYSTORE_EXTENSIONS = {
-        "JKS", "KEYSTORE", "PEM", "CRL", "TRUSTSTORE"
+            "JKS", "KEYSTORE", "PEM", "CRL", "TRUSTSTORE"
     };
 
     private static final String[] IMAGE_EXTENSIONS = {
-        "PNG", "PDF",
-        "GIF", "GIFF",
-        "TIF", "TIFF",
-        "JPG", "JPEG",
-        "ICO", "ICNS",
-        "PSD",
+            "PNG", "PDF",
+            "GIF", "GIFF",
+            "TIF", "TIFF",
+            "JPG", "JPEG",
+            "ICO", "ICNS",
+            "PSD",
     };
 
     private static final String[] BYTECODE_EXTENSIONS = {
-        "CLASS", "PYD",
-        "OBJ", "PYC",
+            "CLASS", "PYD",
+            "OBJ", "PYC",
     };
-    
+
     /**
      * Based on http://www.apache.org/dev/svn-eol-style.txt
      */
     private static final String[] NON_BINARY_EXTENSIONS = {
-        "AART",
-        "AC",
-        "AM",
-        "BAT",
-        "C",
-        "CAT",
-        "CGI",
-        "CLASSPATH",
-        "CMD",
-        "CONFIG",
-        "CPP",
-        "CSS",
-        "CWIKI",
-        "DATA",
-        "DCL",
-        "DTD",
-        "EGRM",
-        "ENT",
-        "FT", 
-        "FN",
-        "FV", 
-        "GRM",
-        "G",
-        "H",
-        "HTACCESS",
-        "HTML",
-        "IHTML",
-        "IN",
-        "JAVA",
-        "JMX", 
-        "JSP",
-        "JS",
-        "JUNIT",
-        "JX", 
-        "MANIFEST",
-        "M4",
-        "MF",
-        "MF",
-        "META",
-        "MOD",
-        "N3",
-        "PEN",
-        "PL",
-        "PM",
-        "POD",
-        "POM",
-        "PROJECT",
-        "PROPERTIES",
-        "PY",
-        "RB",
-        "RDF",
-        "RNC",
-        "RNG",
-        "RNX",
-        "ROLES",
-        "RSS",
-        "SH",
-        "SQL",
-        "SVG",
-        "TLD",
-        "TXT",
-        "TYPES",
-        "VM",
-        "VSL",
-        "WSDD",
-        "WSDL",
-        "XARGS",
-        "XCAT",
-        "XCONF",
-        "XEGRM",
-        "XGRM",
-        "XLEX",
-        "XLOG",
-        "XMAP",
-        "XML",
-        "XROLES",
-        "XSAMPLES",
-        "XSD",
-        "XSL",
-        "XSLT",
-        "XSP",
-        "XUL",
-        "XWEB",
-        "XWELCOME",
+            "AART",
+            "AC",
+            "AM",
+            "BAT",
+            "C",
+            "CAT",
+            "CGI",
+            "CLASSPATH",
+            "CMD",
+            "CONFIG",
+            "CPP",
+            "CSS",
+            "CWIKI",
+            "DATA",
+            "DCL",
+            "DTD",
+            "EGRM",
+            "ENT",
+            "FT",
+            "FN",
+            "FV",
+            "GRM",
+            "G",
+            "H",
+            "HTACCESS",
+            "HTML",
+            "IHTML",
+            "IN",
+            "JAVA",
+            "JMX",
+            "JSP",
+            "JS",
+            "JUNIT",
+            "JX",
+            "MANIFEST",
+            "M4",
+            "MF",
+            "MF",
+            "META",
+            "MOD",
+            "N3",
+            "PEN",
+            "PL",
+            "PM",
+            "POD",
+            "POM",
+            "PROJECT",
+            "PROPERTIES",
+            "PY",
+            "RB",
+            "RDF",
+            "RNC",
+            "RNG",
+            "RNX",
+            "ROLES",
+            "RSS",
+            "SH",
+            "SQL",
+            "SVG",
+            "TLD",
+            "TXT",
+            "TYPES",
+            "VM",
+            "VSL",
+            "WSDD",
+            "WSDL",
+            "XARGS",
+            "XCAT",
+            "XCONF",
+            "XEGRM",
+            "XGRM",
+            "XLEX",
+            "XLOG",
+            "XMAP",
+            "XML",
+            "XROLES",
+            "XSAMPLES",
+            "XSD",
+            "XSL",
+            "XSLT",
+            "XSP",
+            "XUL",
+            "XWEB",
+            "XWELCOME",
     };
     public static final String JAR_MANIFEST = "MANIFEST.MF";
     public static final String JAVA = "JAVA";
@@ -361,11 +353,10 @@ public class BinaryGuesser {
         // TODO: more efficient to move into standard analysis
         // TODO: then use binary as default
         return isBinary(document.getName())
-            ||
-            // try a taste
-            isBinaryDocument(document);
+                ||
+                // try a taste
+                isBinaryDocument(document);
     }
 
 
-
 }

Modified: creadur/rat/trunk/apache-rat-core/src/test/java/org/apache/rat/document/impl/guesser/BinaryGuesserTest.java
URL: http://svn.apache.org/viewvc/creadur/rat/trunk/apache-rat-core/src/test/java/org/apache/rat/document/impl/guesser/BinaryGuesserTest.java?rev=1680956&r1=1680955&r2=1680956&view=diff
==============================================================================
--- creadur/rat/trunk/apache-rat-core/src/test/java/org/apache/rat/document/impl/guesser/BinaryGuesserTest.java (original)
+++ creadur/rat/trunk/apache-rat-core/src/test/java/org/apache/rat/document/impl/guesser/BinaryGuesserTest.java Thu May 21 21:47:15 2015
@@ -25,53 +25,54 @@ import org.junit.Test;
 
 import java.io.IOException;
 import java.io.Reader;
+import java.util.Arrays;
+import java.util.List;
 
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertTrue;
 
 public class BinaryGuesserTest {
 
+    private static final List<String> BINARY_FILES = Arrays.asList(//
+            "image.png",//
+            "image.pdf",//
+            "image.psd",//
+            "image.gif",//
+            "image.giff",//
+            "image.jpg",//
+            "image.jpeg",//
+            "image.exe",//
+            "Whatever.class",//
+            "data.dat",//
+            "libicuda.so.34",//
+            "my.truststore",//
+            //"foo.Java", //
+            //"manifest.Mf",//
+            "deprecatedtechnology.swf"
+    );
+
+
     @Test
     public void testMatches() {
-        assertTrue(BinaryGuesser.isBinary(new MockDocument("image.png")));
-        assertTrue(BinaryGuesser.isBinary(new MockDocument("image.pdf")));
-        assertTrue(BinaryGuesser.isBinary(new MockDocument("image.psd")));
-        assertTrue(BinaryGuesser.isBinary(new MockDocument("image.gif")));
-        assertTrue(BinaryGuesser.isBinary(new MockDocument("image.giff")));
-        assertTrue(BinaryGuesser.isBinary(new MockDocument("image.tif")));
-        assertTrue(BinaryGuesser.isBinary(new MockDocument("image.tiff")));
-        assertTrue(BinaryGuesser.isBinary(new MockDocument("image.jpg")));
-        assertTrue(BinaryGuesser.isBinary(new MockDocument("image.jpeg")));
-        assertTrue(BinaryGuesser.isBinary(new MockDocument("image.exe")));
-        assertTrue(BinaryGuesser.isBinary(new MockDocument("Whatever.class")));
-        assertTrue(BinaryGuesser.isBinary(new MockDocument("data.dat")));
-        assertTrue(BinaryGuesser.isBinary(new MockDocument("libicudata.so.34.")));
-        assertTrue(BinaryGuesser.isBinary(new MockDocument("my.truststore")));
+        for (String name : BINARY_FILES) {
+            assertTrue("'" + name + "' should be detected as a binary", BinaryGuesser.isBinary(new MockDocument(name)));
+        }
+
     }
 
+    @Test
     public void testIsBinary() {
-        assertTrue(BinaryGuesser.isBinary("image.png"));
-        assertTrue(BinaryGuesser.isBinary("image.pdf"));
-        assertTrue(BinaryGuesser.isBinary("image.psd"));
-        assertTrue(BinaryGuesser.isBinary("image.gif"));
-        assertTrue(BinaryGuesser.isBinary("image.giff"));
-        assertTrue(BinaryGuesser.isBinary("image.tif"));
-        assertTrue(BinaryGuesser.isBinary("image.tiff"));
-        assertTrue(BinaryGuesser.isBinary("image.jpg"));
-        assertTrue(BinaryGuesser.isBinary("image.jpeg"));
-        assertTrue(BinaryGuesser.isBinary("image.exe"));
-        assertTrue(BinaryGuesser.isBinary("Whatever.class"));
-        assertTrue(BinaryGuesser.isBinary("data.dat"));
-        assertTrue(BinaryGuesser.isBinary("libicudata.so.34."));
-        assertTrue(BinaryGuesser.isBinary("my.truststore"));
+        for (String name : BINARY_FILES) {
+            assertTrue("'" + name + "' should be detected as a binary", BinaryGuesser.isBinary(name));
+        }
     }
 
     /**
      * Used to swallow a MalformedInputException and return false
      * because the encoding of the stream was different from the
      * platform's default encoding.
-     * @throws Exception 
      *
+     * @throws Exception
      * @see "RAT-81"
      */
     @Test
@@ -91,13 +92,13 @@ public class BinaryGuesserTest {
             // still here?  can't test on this platform
             System.err.println("Skipping testBinaryWithMalformedInput");
         } catch (IOException e) {
-            if (r!= null) {
+            if (r != null) {
                 r.close();
             } else {
                 throw e; // could not open the second file
             }
             r = null;
-            assertTrue("Expected binary for "+ doc.getName(),BinaryGuesser.isBinary(doc));
+            assertTrue("Expected binary for " + doc.getName(), BinaryGuesser.isBinary(doc));
         } finally {
             if (r != null) {
                 r.close();
@@ -114,9 +115,9 @@ public class BinaryGuesserTest {
             assertTrue(isBinary);
         } else {
             if (isBinary) {
-                System.out.println("BinaryGuesserTest.realBinaryContent() succeeded when using encoding "+encoding);
+                System.out.println("BinaryGuesserTest.realBinaryContent() succeeded when using encoding " + encoding);
             } else {
-                System.err.println("BinaryGuesserTest.realBinaryContent() failed when using encoding "+encoding);
+                System.err.println("BinaryGuesserTest.realBinaryContent() failed when using encoding " + encoding);
             }
         }
     }