You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by sn...@apache.org on 2014/07/05 22:36:33 UTC

svn commit: r1608130 - in /nutch: branches/2.x/ branches/2.x/src/java/org/apache/nutch/util/ branches/2.x/src/test/org/apache/nutch/util/ branches/2.x/src/testresources/test-mime-util/ trunk/ trunk/src/java/org/apache/nutch/util/ trunk/src/test/org/apa...

Author: snagel
Date: Sat Jul  5 20:36:33 2014
New Revision: 1608130

URL: http://svn.apache.org/r1608130
Log:
NUTCH-1605 MIME type detector recognizes xlsx as zip file

Added:
    nutch/branches/2.x/src/test/org/apache/nutch/util/TestMimeUtil.java   (with props)
    nutch/branches/2.x/src/testresources/test-mime-util/
    nutch/branches/2.x/src/testresources/test-mime-util/test.xlsx   (with props)
    nutch/trunk/src/test/org/apache/nutch/util/TestMimeUtil.java   (with props)
    nutch/trunk/src/testresources/test-mime-util/
    nutch/trunk/src/testresources/test-mime-util/test.xlsx   (with props)
Modified:
    nutch/branches/2.x/CHANGES.txt
    nutch/branches/2.x/src/java/org/apache/nutch/util/MimeUtil.java
    nutch/trunk/CHANGES.txt
    nutch/trunk/src/java/org/apache/nutch/util/MimeUtil.java

Modified: nutch/branches/2.x/CHANGES.txt
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/CHANGES.txt?rev=1608130&r1=1608129&r2=1608130&view=diff
==============================================================================
--- nutch/branches/2.x/CHANGES.txt (original)
+++ nutch/branches/2.x/CHANGES.txt Sat Jul  5 20:36:33 2014
@@ -2,6 +2,8 @@ Nutch Change Log
 
 Current Development
 
+* NUTCH-1605 MIME type detector recognizes xlsx as zip file (snagel)
+
 * NUTCH-385 Improve description of thread related configuration for Fetcher (jnioche,lufeng)
 
 * NUTCH-1798 Crawl script not calling index command correctly (Aaron Bedward via jnioche)

Modified: nutch/branches/2.x/src/java/org/apache/nutch/util/MimeUtil.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/java/org/apache/nutch/util/MimeUtil.java?rev=1608130&r1=1608129&r2=1608130&view=diff
==============================================================================
--- nutch/branches/2.x/src/java/org/apache/nutch/util/MimeUtil.java (original)
+++ nutch/branches/2.x/src/java/org/apache/nutch/util/MimeUtil.java Sat Jul  5 20:36:33 2014
@@ -19,13 +19,16 @@ package org.apache.nutch.util;
 
 // JDK imports
 import java.io.File;
+import java.io.IOException;
+import java.io.InputStream;
 
 // Hadoop imports
 import org.apache.hadoop.conf.Configuration;
 
 // Tika imports
 import org.apache.tika.Tika;
-import org.apache.tika.config.TikaConfig;
+import org.apache.tika.io.TikaInputStream;
+import org.apache.tika.metadata.Metadata;
 import org.apache.tika.mime.MimeType;
 import org.apache.tika.mime.MimeTypeException;
 import org.apache.tika.mime.MimeTypes;
@@ -128,10 +131,10 @@ public final class MimeUtil {
    * strategies available within Tika. First, the mime type provided in
    * <code>typeName</code> is cleaned, with {@link #cleanMimeType(String)}.
    * Then the cleaned mime type is looked up in the underlying Tika
-   * {@link MimeTypes} registry, by its cleaned name. If the {@link MimeType} is
-   * found, then that mime type is used, otherwise URL resolution is
-   * used to try and determine the mime type. If that means is unsuccessful, and
-   * if <code>mime.type.magic</code> is enabled in {@link NutchConfiguration},
+   * {@link MimeTypes} registry, by its cleaned name. If the {@link MimeType}
+   * is found, then that mime type is used, otherwise URL resolution is
+   * used to try and determine the mime type. However, if
+   * <code>mime.type.magic</code> is enabled in {@link NutchConfiguration},
    * then mime type magic resolution is used to try and obtain a
    * better-than-the-default approximation of the {@link MimeType}.
    * 
@@ -145,24 +148,19 @@ public final class MimeUtil {
    */
   public String autoResolveContentType(String typeName, String url, byte[] data) {
     String retType = null;
-    String magicType = null;
     MimeType type = null;
     String cleanedMimeType = null;
 
-    try {
-      cleanedMimeType = MimeUtil.cleanMimeType(typeName) != null ? this.mimeTypes
-          .forName(MimeUtil.cleanMimeType(typeName)).getName()
-          : null;
-    } catch (MimeTypeException mte) {
-      // Seems to be a malformed mime type name...
-    }
-
+    cleanedMimeType = MimeUtil.cleanMimeType(typeName);
     // first try to get the type from the cleaned type name
-    try {
-      type = cleanedMimeType != null ? this.mimeTypes.forName(cleanedMimeType)
-          : null;
-    } catch (MimeTypeException e) {
-      type = null;
+    if (cleanedMimeType != null) {
+      try {
+        type = mimeTypes.forName(cleanedMimeType);
+        cleanedMimeType = type.getName();
+      } catch (MimeTypeException mte) {
+        // Seems to be a malformed mime type name...
+        cleanedMimeType = null;
+      }
     }
 
     // if returned null, or if it's the default type then try url resolution
@@ -172,8 +170,6 @@ public final class MimeUtil {
       // mime-type, then guess a mime-type from the url pattern
 
       try {
-        TikaConfig tikaConfig = TikaConfig.getDefaultConfig();
-        Tika tika = new Tika(tikaConfig);
         retType = tika.detect(url) != null ? tika.detect(url) : null;
       } catch (Exception e) {
         String message = "Problem loading default Tika configuration";
@@ -189,10 +185,21 @@ public final class MimeUtil {
     // if it is, and it's not the default mime type, then go with the mime type
     // returned by the magic
     if (this.mimeMagic) {
-      magicType = tika.detect(data);
+      String magicType = null;
+      // pass URL (file name) and (cleansed) content type from protocol to Tika
+      Metadata tikaMeta = new Metadata();
+      tikaMeta.add(Metadata.RESOURCE_NAME_KEY, url);
+      tikaMeta.add(Metadata.CONTENT_TYPE,
+          (cleanedMimeType != null ? cleanedMimeType : typeName));
+      try {
+        InputStream stream = TikaInputStream.get(data);
+        try {
+          magicType = tika.detect(stream, tikaMeta);
+       } finally {
+         stream.close();
+        }
+      } catch (IOException ignore) {}
 
-      // Deprecated in Tika 1.0 See https://issues.apache.org/jira/browse/NUTCH-1230
-      //MimeType magicType = this.mimeTypes.getMimeType(data);
       if (magicType != null && !magicType.equals(MimeTypes.OCTET_STREAM)
           && !magicType.equals(MimeTypes.PLAIN_TEXT)
           && retType != null && !retType.equals(magicType)) {

Added: nutch/branches/2.x/src/test/org/apache/nutch/util/TestMimeUtil.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/test/org/apache/nutch/util/TestMimeUtil.java?rev=1608130&view=auto
==============================================================================
--- nutch/branches/2.x/src/test/org/apache/nutch/util/TestMimeUtil.java (added)
+++ nutch/branches/2.x/src/test/org/apache/nutch/util/TestMimeUtil.java Sat Jul  5 20:36:33 2014
@@ -0,0 +1,130 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.nutch.util;
+
+import java.io.File;
+import java.io.IOException;
+import java.nio.charset.Charset;
+
+import org.apache.hadoop.conf.Configuration;
+
+import com.google.common.io.Files;
+
+import junit.framework.TestCase;
+
+public class TestMimeUtil extends TestCase {
+
+  public static String urlPrefix = "http://localhost/";
+
+  private static Charset defaultCharset = Charset.forName("UTF-8");
+
+  private File sampleDir = new File(System.getProperty("test.build.data", "."),
+      "test-mime-util");
+
+  /** test data, every element on "test page":
+   * <ol>
+   * <li>MIME type</li>
+   * <li>file name (last URL path element)</li>
+   * <li>Content-Type (HTTP header)</li>
+   * <li>content: if empty, do not test MIME magic</li>
+   * </ol>
+   */
+  public static String[][] textBasedFormats = {
+      {
+          "text/html",
+          "test.html",
+          "text/html; charset=utf-8",
+          "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\" "
+              + "\"http://www.w3.org/TR/html4/loose.dtd\">\n"
+              + "<html>\n<head>\n"
+              + "<meta http-equiv=Content-Type content=\"text/html; charset=utf-8\" />\n"
+              + "</head>\n<body>Hello, World!</body></html>" },
+      {
+          "text/html",
+          "test.html",
+          "", // no Content-Type in HTTP header => test URL pattern
+          "<!DOCTYPE html>\n<html>\n<head>\n"
+              + "</head>\n<body>Hello, World!</body></html>" },
+      {
+          "application/xhtml+xml",
+          "test.html",
+          "application/xhtml+xml; charset=utf-8",
+          "<?xml version=\"1.0\"?>\n<html xmlns=\"http://www.w3.org/1999/xhtml\">"
+              + "<html>\n<head>\n"
+              + "<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\" />"
+              + "</head>\n<body>Hello, World!</body></html>" }
+    };
+
+  public static String[][] binaryFiles = {
+    {
+      "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
+      "test.xlsx",
+      "" }
+    };
+
+  private String getMimeType(String url, File file, String contentType,
+      boolean useMagic) throws IOException {
+    return getMimeType(url, Files.toByteArray(file), contentType, useMagic);
+  }
+
+  private String getMimeType(String url, byte[] bytes, String contentType,
+      boolean useMagic) {
+    Configuration conf = NutchConfiguration.create();
+    conf.setBoolean("mime.type.magic", useMagic);
+    MimeUtil mimeUtil = new MimeUtil(conf);
+    return mimeUtil.autoResolveContentType(contentType, url, bytes);
+  }
+
+  /** use HTTP Content-Type, URL pattern, and MIME magic */
+  public void testWithMimeMagic() {
+    for (String[] testPage : textBasedFormats) {
+      String mimeType = getMimeType(urlPrefix,
+          testPage[3].getBytes(defaultCharset), testPage[2], true);
+      assertEquals("", testPage[0], mimeType);
+    }
+  }
+
+  /** use only HTTP Content-Type (if given) and URL pattern */
+  public void testWithoutMimeMagic() {
+    for (String[] testPage : textBasedFormats) {
+      String mimeType = getMimeType(urlPrefix + testPage[1],
+          testPage[3].getBytes(defaultCharset), testPage[2], false);
+      assertEquals("", testPage[0], mimeType);
+    }
+  }
+
+  /** use only MIME magic (detection from content bytes) */
+  public void testOnlyMimeMagic() {
+    for (String[] testPage : textBasedFormats) {
+      String mimeType = getMimeType(urlPrefix,
+          testPage[3].getBytes(defaultCharset), "", true);
+      assertEquals("", testPage[0], mimeType);
+    }
+  }
+
+  /** test binary file formats (real files) */
+  public void testBinaryFiles() throws IOException {
+    for (String[] testPage : binaryFiles) {
+      File dataFile = new File(sampleDir, testPage[1]);
+      String mimeType = getMimeType(urlPrefix + testPage[1],
+          dataFile, testPage[2], false);
+      assertEquals("", testPage[0], mimeType);
+    }
+  }
+
+}

Propchange: nutch/branches/2.x/src/test/org/apache/nutch/util/TestMimeUtil.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: nutch/branches/2.x/src/testresources/test-mime-util/test.xlsx
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/testresources/test-mime-util/test.xlsx?rev=1608130&view=auto
==============================================================================
Binary file - no diff available.

Propchange: nutch/branches/2.x/src/testresources/test-mime-util/test.xlsx
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Modified: nutch/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/nutch/trunk/CHANGES.txt?rev=1608130&r1=1608129&r2=1608130&view=diff
==============================================================================
--- nutch/trunk/CHANGES.txt (original)
+++ nutch/trunk/CHANGES.txt Sat Jul  5 20:36:33 2014
@@ -2,6 +2,8 @@ Nutch Change Log
 
 Nutch Current Development
 
+* NUTCH-1605 MIME type detector recognizes xlsx as zip file (snagel)
+
 * NUTCH-1802 Move TestbedProxy to test environment (jnioche)
 
 * NUTCH-1803 Put test dependencies in a separate lib dir (jnioche)

Modified: nutch/trunk/src/java/org/apache/nutch/util/MimeUtil.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/util/MimeUtil.java?rev=1608130&r1=1608129&r2=1608130&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/util/MimeUtil.java (original)
+++ nutch/trunk/src/java/org/apache/nutch/util/MimeUtil.java Sat Jul  5 20:36:33 2014
@@ -19,13 +19,16 @@ package org.apache.nutch.util;
 
 // JDK imports
 import java.io.File;
+import java.io.IOException;
+import java.io.InputStream;
 
 // Hadoop imports
 import org.apache.hadoop.conf.Configuration;
 
 // Tika imports
 import org.apache.tika.Tika;
-import org.apache.tika.config.TikaConfig;
+import org.apache.tika.io.TikaInputStream;
+import org.apache.tika.metadata.Metadata;
 import org.apache.tika.mime.MimeType;
 import org.apache.tika.mime.MimeTypeException;
 import org.apache.tika.mime.MimeTypes;
@@ -128,10 +131,10 @@ public final class MimeUtil {
    * strategies available within Tika. First, the mime type provided in
    * <code>typeName</code> is cleaned, with {@link #cleanMimeType(String)}.
    * Then the cleaned mime type is looked up in the underlying Tika
-   * {@link MimeTypes} registry, by its cleaned name. If the {@link MimeType} is
-   * found, then that mime type is used, otherwise URL resolution is
-   * used to try and determine the mime type. If that means is unsuccessful, and
-   * if <code>mime.type.magic</code> is enabled in {@link NutchConfiguration},
+   * {@link MimeTypes} registry, by its cleaned name. If the {@link MimeType}
+   * is found, then that mime type is used, otherwise URL resolution is
+   * used to try and determine the mime type. However, if
+   * <code>mime.type.magic</code> is enabled in {@link NutchConfiguration},
    * then mime type magic resolution is used to try and obtain a
    * better-than-the-default approximation of the {@link MimeType}.
    * 
@@ -145,24 +148,19 @@ public final class MimeUtil {
    */
   public String autoResolveContentType(String typeName, String url, byte[] data) {
     String retType = null;
-    String magicType = null;
     MimeType type = null;
     String cleanedMimeType = null;
 
-    try {
-      cleanedMimeType = MimeUtil.cleanMimeType(typeName) != null ? this.mimeTypes
-          .forName(MimeUtil.cleanMimeType(typeName)).getName()
-          : null;
-    } catch (MimeTypeException mte) {
-      // Seems to be a malformed mime type name...
-    }
-
+    cleanedMimeType = MimeUtil.cleanMimeType(typeName);
     // first try to get the type from the cleaned type name
-    try {
-      type = cleanedMimeType != null ? this.mimeTypes.forName(cleanedMimeType)
-          : null;
-    } catch (MimeTypeException e) {
-      type = null;
+    if (cleanedMimeType != null) {
+      try {
+        type = mimeTypes.forName(cleanedMimeType);
+        cleanedMimeType = type.getName();
+      } catch (MimeTypeException mte) {
+        // Seems to be a malformed mime type name...
+        cleanedMimeType = null;
+      }
     }
 
     // if returned null, or if it's the default type then try url resolution
@@ -171,8 +169,6 @@ public final class MimeUtil {
       // If no mime-type header, or cannot find a corresponding registered
       // mime-type, then guess a mime-type from the url pattern
       try {
-        TikaConfig tikaConfig = TikaConfig.getDefaultConfig();
-        Tika tika = new Tika(tikaConfig);
         retType = tika.detect(url) != null ? tika.detect(url) : null;
       } catch (Exception e) {
         String message = "Problem loading default Tika configuration";
@@ -188,10 +184,21 @@ public final class MimeUtil {
     // if it is, and it's not the default mime type, then go with the mime type
     // returned by the magic
     if (this.mimeMagic) {
-      magicType = tika.detect(data);
+      String magicType = null;
+      // pass URL (file name) and (cleansed) content type from protocol to Tika
+      Metadata tikaMeta = new Metadata();
+      tikaMeta.add(Metadata.RESOURCE_NAME_KEY, url);
+      tikaMeta.add(Metadata.CONTENT_TYPE,
+          (cleanedMimeType != null ? cleanedMimeType : typeName));
+      try {
+        InputStream stream = TikaInputStream.get(data);
+        try {
+          magicType = tika.detect(stream, tikaMeta);
+       } finally {
+         stream.close();
+        }
+      } catch (IOException ignore) {}
 
-      // Deprecated in Tika 1.0 See https://issues.apache.org/jira/browse/NUTCH-1230
-      //MimeType magicType = this.mimeTypes.getMimeType(data);
       if (magicType != null && !magicType.equals(MimeTypes.OCTET_STREAM)
           && !magicType.equals(MimeTypes.PLAIN_TEXT)
           && retType != null && !retType.equals(magicType)) {

Added: nutch/trunk/src/test/org/apache/nutch/util/TestMimeUtil.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/test/org/apache/nutch/util/TestMimeUtil.java?rev=1608130&view=auto
==============================================================================
--- nutch/trunk/src/test/org/apache/nutch/util/TestMimeUtil.java (added)
+++ nutch/trunk/src/test/org/apache/nutch/util/TestMimeUtil.java Sat Jul  5 20:36:33 2014
@@ -0,0 +1,130 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.nutch.util;
+
+import java.io.File;
+import java.io.IOException;
+import java.nio.charset.Charset;
+
+import org.apache.hadoop.conf.Configuration;
+
+import com.google.common.io.Files;
+
+import junit.framework.TestCase;
+
+public class TestMimeUtil extends TestCase {
+
+  public static String urlPrefix = "http://localhost/";
+
+  private static Charset defaultCharset = Charset.forName("UTF-8");
+
+  private File sampleDir = new File(System.getProperty("test.build.data", "."),
+      "test-mime-util");
+
+  /** test data, every element on "test page":
+   * <ol>
+   * <li>MIME type</li>
+   * <li>file name (last URL path element)</li>
+   * <li>Content-Type (HTTP header)</li>
+   * <li>content: if empty, do not test MIME magic</li>
+   * </ol>
+   */
+  public static String[][] textBasedFormats = {
+      {
+          "text/html",
+          "test.html",
+          "text/html; charset=utf-8",
+          "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\" "
+              + "\"http://www.w3.org/TR/html4/loose.dtd\">\n"
+              + "<html>\n<head>\n"
+              + "<meta http-equiv=Content-Type content=\"text/html; charset=utf-8\" />\n"
+              + "</head>\n<body>Hello, World!</body></html>" },
+      {
+          "text/html",
+          "test.html",
+          "", // no Content-Type in HTTP header => test URL pattern
+          "<!DOCTYPE html>\n<html>\n<head>\n"
+              + "</head>\n<body>Hello, World!</body></html>" },
+      {
+          "application/xhtml+xml",
+          "test.html",
+          "application/xhtml+xml; charset=utf-8",
+          "<?xml version=\"1.0\"?>\n<html xmlns=\"http://www.w3.org/1999/xhtml\">"
+              + "<html>\n<head>\n"
+              + "<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\" />"
+              + "</head>\n<body>Hello, World!</body></html>" }
+    };
+
+  public static String[][] binaryFiles = {
+    {
+      "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
+      "test.xlsx",
+      "" }
+    };
+
+  private String getMimeType(String url, File file, String contentType,
+      boolean useMagic) throws IOException {
+    return getMimeType(url, Files.toByteArray(file), contentType, useMagic);
+  }
+
+  private String getMimeType(String url, byte[] bytes, String contentType,
+      boolean useMagic) {
+    Configuration conf = NutchConfiguration.create();
+    conf.setBoolean("mime.type.magic", useMagic);
+    MimeUtil mimeUtil = new MimeUtil(conf);
+    return mimeUtil.autoResolveContentType(contentType, url, bytes);
+  }
+
+  /** use HTTP Content-Type, URL pattern, and MIME magic */
+  public void testWithMimeMagic() {
+    for (String[] testPage : textBasedFormats) {
+      String mimeType = getMimeType(urlPrefix,
+          testPage[3].getBytes(defaultCharset), testPage[2], true);
+      assertEquals("", testPage[0], mimeType);
+    }
+  }
+
+  /** use only HTTP Content-Type (if given) and URL pattern */
+  public void testWithoutMimeMagic() {
+    for (String[] testPage : textBasedFormats) {
+      String mimeType = getMimeType(urlPrefix + testPage[1],
+          testPage[3].getBytes(defaultCharset), testPage[2], false);
+      assertEquals("", testPage[0], mimeType);
+    }
+  }
+
+  /** use only MIME magic (detection from content bytes) */
+  public void testOnlyMimeMagic() {
+    for (String[] testPage : textBasedFormats) {
+      String mimeType = getMimeType(urlPrefix,
+          testPage[3].getBytes(defaultCharset), "", true);
+      assertEquals("", testPage[0], mimeType);
+    }
+  }
+
+  /** test binary file formats (real files) */
+  public void testBinaryFiles() throws IOException {
+    for (String[] testPage : binaryFiles) {
+      File dataFile = new File(sampleDir, testPage[1]);
+      String mimeType = getMimeType(urlPrefix + testPage[1],
+          dataFile, testPage[2], false);
+      assertEquals("", testPage[0], mimeType);
+    }
+  }
+
+}

Propchange: nutch/trunk/src/test/org/apache/nutch/util/TestMimeUtil.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: nutch/trunk/src/testresources/test-mime-util/test.xlsx
URL: http://svn.apache.org/viewvc/nutch/trunk/src/testresources/test-mime-util/test.xlsx?rev=1608130&view=auto
==============================================================================
Binary file - no diff available.

Propchange: nutch/trunk/src/testresources/test-mime-util/test.xlsx
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream