You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ni...@apache.org on 2014/04/12 06:06:27 UTC

svn commit: r1586813 - in /tika/trunk/tika-server/src: main/java/org/apache/tika/server/ test/java/org/apache/tika/server/

Author: nick
Date: Sat Apr 12 04:06:27 2014
New Revision: 1586813

URL: http://svn.apache.org/r1586813
Log:
TIKA-1270 Start on support for reporting the mimetypes that are known, still partly WIP

Added:
    tika/trunk/tika-server/src/main/java/org/apache/tika/server/TikaMimeTypes.java
    tika/trunk/tika-server/src/test/java/org/apache/tika/server/TikaMimeTypesTest.java
Modified:
    tika/trunk/tika-server/src/main/java/org/apache/tika/server/TikaServerCli.java
    tika/trunk/tika-server/src/test/java/org/apache/tika/server/CXFTestBase.java

Added: tika/trunk/tika-server/src/main/java/org/apache/tika/server/TikaMimeTypes.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-server/src/main/java/org/apache/tika/server/TikaMimeTypes.java?rev=1586813&view=auto
==============================================================================
--- tika/trunk/tika-server/src/main/java/org/apache/tika/server/TikaMimeTypes.java (added)
+++ tika/trunk/tika-server/src/main/java/org/apache/tika/server/TikaMimeTypes.java Sat Apr 12 04:06:27 2014
@@ -0,0 +1,108 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.server;
+
+import java.util.HashMap;
+import java.util.Map;
+
+import javax.ws.rs.GET;
+import javax.ws.rs.Path;
+import javax.ws.rs.Produces;
+
+import org.apache.tika.config.TikaConfig;
+import org.apache.tika.mime.MediaType;
+import org.apache.tika.mime.MediaTypeRegistry;
+import org.apache.tika.parser.CompositeParser;
+import org.apache.tika.parser.Parser;
+import org.eclipse.jetty.util.ajax.JSON;
+
+/*
+ * TODO Reduce duplication between the two methods, by
+ * returning structured info that gets encoded two ways
+ */
+@Path("/mime-types")
+public class TikaMimeTypes {
+    private TikaConfig tika;
+    public TikaMimeTypes(TikaConfig tika) {
+        this.tika = tika;
+    }
+    
+    @GET
+    @Produces(javax.ws.rs.core.MediaType.APPLICATION_JSON)
+    public String getMimeTypesJSON() {
+        Map<String,Object> details = new HashMap<String, Object>();
+        
+        MediaTypeRegistry registry = tika.getMediaTypeRegistry();
+        Map<MediaType, Parser> parsers = ((CompositeParser)tika.getParser()).getParsers();
+
+        for (MediaType type : registry.getTypes()) {
+            Map<String,Object> typeDets = new HashMap<String, Object>();
+
+            typeDets.put("alias", registry.getAliases(type));
+            MediaType supertype = registry.getSupertype(type);
+            if (supertype != null && !MediaType.OCTET_STREAM.equals(supertype)) {
+                typeDets.put("supertype", supertype);
+            }
+            Parser p = parsers.get(type);
+            if (p != null) {
+                if (p instanceof CompositeParser) {
+                    p = ((CompositeParser)p).getParsers().get(type);
+                }
+                typeDets.put("parser", p.getClass().getName());
+            }
+
+            details.put(type.toString(), typeDets);
+        }
+        
+        return JSON.toString(details);
+    }
+    
+    @GET
+    @Produces("text/plain")
+    public String getMimeTypesPlain() {
+        StringBuffer text = new StringBuffer();
+        
+        MediaTypeRegistry registry = tika.getMediaTypeRegistry();
+        Map<MediaType, Parser> parsers = ((CompositeParser)tika.getParser()).getParsers();
+
+        for (MediaType type : registry.getTypes()) {
+            text.append(type);
+            text.append("\n");
+            
+            for (MediaType alias : registry.getAliases(type)) {
+                text.append("  alias:     " + alias);
+                text.append("\n");
+            }
+            MediaType supertype = registry.getSupertype(type);
+            if (supertype != null && !MediaType.OCTET_STREAM.equals(supertype)) {
+                text.append("  supertype: " + supertype);
+                text.append("\n");
+            }
+            
+            Parser p = parsers.get(type);
+            if (p != null) {
+                if (p instanceof CompositeParser) {
+                    p = ((CompositeParser)p).getParsers().get(type);
+                }
+                text.append("  parser:    " + p.getClass().getName());
+                text.append("\n");
+            }
+        }
+
+        return text.toString();
+    }
+}

Modified: tika/trunk/tika-server/src/main/java/org/apache/tika/server/TikaServerCli.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-server/src/main/java/org/apache/tika/server/TikaServerCli.java?rev=1586813&r1=1586812&r2=1586813&view=diff
==============================================================================
--- tika/trunk/tika-server/src/main/java/org/apache/tika/server/TikaServerCli.java (original)
+++ tika/trunk/tika-server/src/main/java/org/apache/tika/server/TikaServerCli.java Sat Apr 12 04:06:27 2014
@@ -90,7 +90,9 @@ public class TikaServerCli {
       TikaConfig tika = TikaConfig.getDefaultConfig();
 
       JAXRSServerFactoryBean sf = new JAXRSServerFactoryBean();
-      sf.setResourceClasses(MetadataEP.class, MetadataResource.class, TikaResource.class, UnpackerResource.class, TikaVersion.class);
+      sf.setResourceClasses(MetadataEP.class, MetadataResource.class, 
+              TikaResource.class, UnpackerResource.class, 
+              TikaMimeTypes.class, TikaVersion.class);
 
       List<Object> providers = new ArrayList<Object>();
       providers.add(new TarWriter());
@@ -104,6 +106,7 @@ public class TikaServerCli {
       rProviders.add(new SingletonResourceProvider(new MetadataResource(tika)));
       rProviders.add(new SingletonResourceProvider(new TikaResource(tika)));
       rProviders.add(new SingletonResourceProvider(new UnpackerResource(tika)));
+      rProviders.add(new SingletonResourceProvider(new TikaMimeTypes(tika)));
       rProviders.add(new SingletonResourceProvider(new TikaVersion(tika)));
       sf.setResourceProviders(rProviders);
       

Modified: tika/trunk/tika-server/src/test/java/org/apache/tika/server/CXFTestBase.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-server/src/test/java/org/apache/tika/server/CXFTestBase.java?rev=1586813&r1=1586812&r2=1586813&view=diff
==============================================================================
--- tika/trunk/tika-server/src/test/java/org/apache/tika/server/CXFTestBase.java (original)
+++ tika/trunk/tika-server/src/test/java/org/apache/tika/server/CXFTestBase.java Sat Apr 12 04:06:27 2014
@@ -17,6 +17,8 @@
 
 package org.apache.tika.server;
 
+import static org.junit.Assert.assertTrue;
+
 import java.io.ByteArrayOutputStream;
 import java.io.File;
 import java.io.FileOutputStream;
@@ -31,13 +33,12 @@ import org.apache.commons.compress.archi
 import org.apache.commons.compress.archivers.ArchiveInputStream;
 import org.apache.commons.compress.archivers.zip.ZipArchiveEntry;
 import org.apache.commons.compress.archivers.zip.ZipFile;
-import org.apache.commons.compress.utils.IOUtils;
 import org.apache.cxf.binding.BindingFactoryManager;
 import org.apache.cxf.endpoint.Server;
-import org.apache.cxf.io.CachedOutputStream;
 import org.apache.cxf.jaxrs.JAXRSBindingFactory;
 import org.apache.cxf.jaxrs.JAXRSServerFactoryBean;
 import org.apache.tika.config.TikaConfig;
+import org.apache.tika.io.IOUtils;
 import org.junit.After;
 import org.junit.Before;
 
@@ -87,13 +88,13 @@ public abstract class CXFTestBase {
         server.destroy();
     }
 
-	protected String getStringFromInputStream(InputStream in) throws Exception {
-		CachedOutputStream bos = new CachedOutputStream();
-		IOUtils.copy(in, bos);
-		in.close();
-		bos.close();
-		return bos.getOut().toString();
-	}
+    public static void assertContains(String needle, String haystack) {
+        assertTrue(needle + " not found in:\n" + haystack, haystack.contains(needle));
+    }
+
+    protected String getStringFromInputStream(InputStream in) throws Exception {
+        return IOUtils.toString(in);
+    }
 
 	protected Map<String, String> readZipArchive(InputStream inputStream) throws IOException {
 		Map<String, String> data = new HashMap<String, String>();

Added: tika/trunk/tika-server/src/test/java/org/apache/tika/server/TikaMimeTypesTest.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-server/src/test/java/org/apache/tika/server/TikaMimeTypesTest.java?rev=1586813&view=auto
==============================================================================
--- tika/trunk/tika-server/src/test/java/org/apache/tika/server/TikaMimeTypesTest.java (added)
+++ tika/trunk/tika-server/src/test/java/org/apache/tika/server/TikaMimeTypesTest.java Sat Apr 12 04:06:27 2014
@@ -0,0 +1,94 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.server;
+
+import static org.junit.Assert.assertEquals;
+
+import java.io.InputStream;
+import java.util.Map;
+
+import javax.ws.rs.core.Response;
+
+import org.apache.cxf.jaxrs.JAXRSServerFactoryBean;
+import org.apache.cxf.jaxrs.client.WebClient;
+import org.apache.cxf.jaxrs.lifecycle.SingletonResourceProvider;
+import org.eclipse.jetty.util.ajax.JSON;
+import org.junit.Test;
+
+public class TikaMimeTypesTest extends CXFTestBase {
+   private static final String MIMETYPES_PATH = "/mime-types";
+
+   @Override
+   protected void setUpResources(JAXRSServerFactoryBean sf) {
+       sf.setResourceClasses(TikaMimeTypes.class);
+       sf.setResourceProvider(
+           TikaMimeTypes.class,
+           new SingletonResourceProvider(new TikaMimeTypes(tika))
+       );
+   }
+
+   @Override
+   protected void setUpProviders(JAXRSServerFactoryBean sf) {}
+
+   @Test
+   public void testGetPlainText() throws Exception {
+       Response response = WebClient
+               .create(endPoint + MIMETYPES_PATH)
+               .type("text/plain")
+               .accept("text/plain")
+               .get();
+       
+       String text = getStringFromInputStream((InputStream) response.getEntity());
+       assertContains("text/plain", text);
+       assertContains("application/xml", text);
+       assertContains("video/x-ogm", text);
+       
+       assertContains("supertype: video/ogg", text);
+       
+       assertContains("alias:     image/bmp", text);
+   }
+
+   @Test
+   @SuppressWarnings("unchecked")
+   public void testGetJSON() throws Exception {
+       Response response = WebClient
+               .create(endPoint + MIMETYPES_PATH)
+               .type(javax.ws.rs.core.MediaType.APPLICATION_JSON)
+               .accept(javax.ws.rs.core.MediaType.APPLICATION_JSON)
+               .get();
+       
+       String jsonStr = getStringFromInputStream((InputStream) response.getEntity());
+       Map<String,Map<String,Object>> json = (Map<String,Map<String,Object>>)JSON.parse(jsonStr);
+       
+       assertEquals(true, json.containsKey("text/plain"));
+       assertEquals(true, json.containsKey("application/xml"));
+       assertEquals(true, json.containsKey("video/x-ogm"));
+       assertEquals(true, json.containsKey("image/x-ms-bmp"));
+       
+       Map<String,Object> bmp = json.get("image/x-ms-bmp");
+       assertEquals(true, bmp.containsKey("alias"));
+       Object[] aliases = (Object[])bmp.get("alias");
+       assertEquals(1, aliases.length);
+       assertEquals("image/bmp", aliases[0]);
+       assertEquals("org.apache.tika.parser.image.ImageParser", bmp.get("parser"));
+
+       Map<String,Object> ogm = json.get("video/x-ogm");
+       assertEquals("video/ogg", ogm.get("supertype"));
+       assertEquals("org.gagravarr.tika.OggParser", ogm.get("parser"));
+   }
+}