You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@labs.apache.org by hw...@apache.org on 2010/08/04 05:08:03 UTC
svn commit: r982108 - in /labs/mouse: guesser/ guesser/__init__.py
guesser/binary.py tests/test_mouse.py
Author: hwright
Date: Wed Aug 4 03:08:03 2010
New Revision: 982108
URL: http://svn.apache.org/viewvc?rev=982108&view=rev
Log:
Add a module and some tests to Mouse which help determine if a file is binary
or not. There is still some work to do, but this is a good start.
Added:
labs/mouse/guesser/ (with props)
labs/mouse/guesser/__init__.py
labs/mouse/guesser/binary.py
Modified:
labs/mouse/tests/test_mouse.py
Propchange: labs/mouse/guesser/
------------------------------------------------------------------------------
--- svn:ignore (added)
+++ svn:ignore Wed Aug 4 03:08:03 2010
@@ -0,0 +1 @@
+*.pyc
Added: labs/mouse/guesser/__init__.py
URL: http://svn.apache.org/viewvc/labs/mouse/guesser/__init__.py?rev=982108&view=auto
==============================================================================
--- labs/mouse/guesser/__init__.py (added)
+++ labs/mouse/guesser/__init__.py Wed Aug 4 03:08:03 2010
@@ -0,0 +1,28 @@
+#!/usr/bin/env python
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+'''A module to help Mouse guess the different kinds of files.
+
+Helper methods are defined here so that consumers only need import this
+module, rather than all the child modules.'''
+
+import binary
+
+def is_binary(item):
+ return binary.is_binary(item)
Added: labs/mouse/guesser/binary.py
URL: http://svn.apache.org/viewvc/labs/mouse/guesser/binary.py?rev=982108&view=auto
==============================================================================
--- labs/mouse/guesser/binary.py (added)
+++ labs/mouse/guesser/binary.py Wed Aug 4 03:08:03 2010
@@ -0,0 +1,71 @@
+#!/usr/bin/env python
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+'''Module to determine if a file with given content is binary.'''
+
+import os
+import mimetypes
+
+
+_data_exts = [
+ 'dat', 'doc', 'ncb', 'idb', 'suo', 'xcf', 'raj', 'cert', 'ks', 'ts', 'odp',
+ ]
+
+_exec_exts = [
+ 'exe', 'dll', 'lib', 'so', 'a', 'exp',
+ ]
+
+_keystore_exts = [
+ 'jks', 'keystore', 'pem', 'crl',
+ ]
+
+_image_exts = [
+ 'png', 'pdf', 'gif', 'giff', 'tif', 'tiff', 'jpg', 'jpeg', 'ico', 'icns',
+ ]
+
+_bytecode_exts = [
+ 'class', 'pyd', 'obj', 'pyc',
+ ]
+
+_binary_exts = _data_exts + _exec_exts + _keystore_exts + _image_exts + \
+ _bytecode_exts
+
+
+def is_binary(item):
+ '''Entry method, will return True if ITEM is thought to be binary,
+ False otherwise.'''
+
+ # First, try the mime-type
+ (type, encoding) = mimetypes.guess_type(item.name)
+ print '%s : %s ' % (item.name, type)
+ if type and type.split('/')[0] in ('image', 'application'):
+ return True
+
+ # Now, manually look at file extensions
+ (root, ext) = os.path.splitext(item.name)
+ if ext[1:] in _binary_exts:
+ return True
+
+ # Special case: we still think is is binary if it contains an executable
+ # extension in the filename, not just at the end
+ for ext in _exec_exts:
+ if ('.' + ext + '.') in item.name:
+ return True
+
+ # Time to attempt a brute-force divination
Modified: labs/mouse/tests/test_mouse.py
URL: http://svn.apache.org/viewvc/labs/mouse/tests/test_mouse.py?rev=982108&r1=982107&r2=982108&view=diff
==============================================================================
--- labs/mouse/tests/test_mouse.py (original)
+++ labs/mouse/tests/test_mouse.py Wed Aug 4 03:08:03 2010
@@ -32,6 +32,7 @@ sys.path.append(os.path.dirname(sys.path
import mouse
import sources
+import guesser
data_path = 'data'
resources_path = os.path.join(os.path.dirname(sys.path[0]), 'resources')
@@ -128,6 +129,22 @@ class TestReport(unittest.TestCase):
# self._check_plain_output('rat-tests', 'rat-tests')
+class TestBinaryGuessing(unittest.TestCase):
+ '''Test the various ways we tell if something is a binary file.
+ This set of tests is largely stolen from RAT's testsuite.'''
+
+ _names = [ 'image.png', 'image.pdf', 'image.gif', 'image.giff', 'image.tif',
+ 'image.tiff', 'image.jpg', 'image.jpeg', 'image.exe', 'Whatever.class',
+ 'data.dat', 'libicudata.so.34.' ]
+
+ def test_is_binary(self):
+ for name in self._names:
+ # Don't bother giving the item contents, since we shouldn't ever
+ # have to check the content, anyway
+ self.assertTrue(guesser.is_binary(sources.Item(name, None)))
+
+
+
class TestFilters(unittest.TestCase):
'Test filtering various files and patterns'
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@labs.apache.org
For additional commands, e-mail: commits-help@labs.apache.org