You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2014/06/05 03:42:28 UTC

svn commit: r1600554 - in /tika/trunk: ./ src/site/apt/ tika-app/ tika-app/src/main/java/org/apache/tika/cli/ tika-serialization/ tika-serialization/src/ tika-serialization/src/main/ tika-serialization/src/main/java/ tika-serialization/src/main/java/or...

Author: tallison
Date: Thu Jun  5 01:42:27 2014
New Revision: 1600554

URL: http://svn.apache.org/r1600554
Log:
TIKA-1311 centralize serialization

Added:
    tika/trunk/tika-serialization/
    tika/trunk/tika-serialization/pom.xml
    tika/trunk/tika-serialization/src/
    tika/trunk/tika-serialization/src/main/
    tika/trunk/tika-serialization/src/main/java/
    tika/trunk/tika-serialization/src/main/java/org/
    tika/trunk/tika-serialization/src/main/java/org/apache/
    tika/trunk/tika-serialization/src/main/java/org/apache/tika/
    tika/trunk/tika-serialization/src/main/java/org/apache/tika/metadata/
    tika/trunk/tika-serialization/src/main/java/org/apache/tika/metadata/serialization/
    tika/trunk/tika-serialization/src/main/java/org/apache/tika/metadata/serialization/JsonMetadata.java
    tika/trunk/tika-serialization/src/main/java/org/apache/tika/metadata/serialization/JsonMetadataDeserializer.java
    tika/trunk/tika-serialization/src/main/java/org/apache/tika/metadata/serialization/JsonMetadataSerializer.java
    tika/trunk/tika-serialization/src/test/
    tika/trunk/tika-serialization/src/test/java/
    tika/trunk/tika-serialization/src/test/java/org/
    tika/trunk/tika-serialization/src/test/java/org/apache/
    tika/trunk/tika-serialization/src/test/java/org/apache/tika/
    tika/trunk/tika-serialization/src/test/java/org/apache/tika/metadata/
    tika/trunk/tika-serialization/src/test/java/org/apache/tika/metadata/serialization/
    tika/trunk/tika-serialization/src/test/java/org/apache/tika/metadata/serialization/JsonMetadataTest.java
Modified:
    tika/trunk/pom.xml
    tika/trunk/src/site/apt/gettingstarted.apt
    tika/trunk/tika-app/pom.xml
    tika/trunk/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java
    tika/trunk/tika-server/pom.xml
    tika/trunk/tika-server/src/main/java/org/apache/tika/server/JSONMessageBodyWriter.java
    tika/trunk/tika-server/src/test/java/org/apache/tika/server/MetadataEPTest.java

Modified: tika/trunk/pom.xml
URL: http://svn.apache.org/viewvc/tika/trunk/pom.xml?rev=1600554&r1=1600553&r2=1600554&view=diff
==============================================================================
--- tika/trunk/pom.xml (original)
+++ tika/trunk/pom.xml Thu Jun  5 01:42:27 2014
@@ -49,6 +49,7 @@
     <module>tika-core</module>
     <module>tika-parsers</module>
     <module>tika-xmp</module>
+    <module>tika-serialization</module>
     <module>tika-app</module>
     <module>tika-bundle</module>
     <module>tika-server</module>

Modified: tika/trunk/src/site/apt/gettingstarted.apt
URL: http://svn.apache.org/viewvc/tika/trunk/src/site/apt/gettingstarted.apt?rev=1600554&r1=1600553&r2=1600554&view=diff
==============================================================================
--- tika/trunk/src/site/apt/gettingstarted.apt (original)
+++ tika/trunk/src/site/apt/gettingstarted.apt Thu Jun  5 01:42:27 2014
@@ -56,6 +56,10 @@ Build artifacts
   Tika parsers. Collection of classes that implement the Tika Parser
   interface based on various external parser libraries.
 
+ [tika-serialization/target/tika-serialization-*.jar]
+  Serialization utilities. This is designed to centralize serialization
+  of common Tika objects. This package is used by tika-app and tika-server.
+
  [tika-app/target/tika-app-*.jar]
   Tika application. Combines the above components and all the external
   parser libraries into a single runnable jar with a GUI and a command
@@ -65,6 +69,14 @@ Build artifacts
   Tika bundle. An OSGi bundle that combines tika-parsers with non-OSGified
   parser libraries to make them easy to deploy in an OSGi environment.
 
+ [tika-server/target/tika-server-*.jar]
+  Tika server. Tika server uses the Apache CXF framework and provides
+  an implementation of JAX-RS for Java. The tika server component builds
+  to a standalone package in Tika. See 
+  {{{http://wiki.apache.org/tika/TikaJAXRS}the wiki}} 
+  for information on running this server
+
+ 
 Using Tika as a Maven dependency
 
  The core library, tika-core, contains the key interfaces and classes of Tika

Modified: tika/trunk/tika-app/pom.xml
URL: http://svn.apache.org/viewvc/tika/trunk/tika-app/pom.xml?rev=1600554&r1=1600553&r2=1600554&view=diff
==============================================================================
--- tika/trunk/tika-app/pom.xml (original)
+++ tika/trunk/tika-app/pom.xml Thu Jun  5 01:42:27 2014
@@ -45,6 +45,11 @@
     </dependency>
     <dependency>
       <groupId>${project.groupId}</groupId>
+      <artifactId>tika-serialization</artifactId>
+      <version>${project.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>${project.groupId}</groupId>
       <artifactId>tika-xmp</artifactId>
       <version>${project.version}</version>
     </dependency>

Modified: tika/trunk/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java?rev=1600554&r1=1600553&r2=1600554&view=diff
==============================================================================
--- tika/trunk/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java (original)
+++ tika/trunk/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java Thu Jun  5 01:42:27 2014
@@ -45,9 +45,6 @@ import javax.xml.transform.sax.SAXTransf
 import javax.xml.transform.sax.TransformerHandler;
 import javax.xml.transform.stream.StreamResult;
 
-import com.google.gson.Gson;
-import com.google.gson.GsonBuilder;
-
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.log4j.BasicConfigurator;
@@ -75,6 +72,7 @@ import org.apache.tika.io.json.JsonMetad
 import org.apache.tika.language.LanguageProfilerBuilder;
 import org.apache.tika.language.ProfilingHandler;
 import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.serialization.JsonMetadata;
 import org.apache.tika.mime.MediaType;
 import org.apache.tika.mime.MediaTypeRegistry;
 import org.apache.tika.mime.MimeTypeException;
@@ -900,29 +898,25 @@ public class TikaCLI {
         }
     }
 
-    /**
-     * Uses GSON. 
-     */
     private class NoDocumentJSONMetHandler extends DefaultHandler {
 
-        private final Gson gson;
-
         protected final Metadata metadata;
         
         protected PrintWriter writer;
 
-        public NoDocumentJSONMetHandler(Metadata metadata, PrintWriter writer){
+        public NoDocumentJSONMetHandler(Metadata metadata, PrintWriter writer) {
             this.metadata = metadata;
             this.writer = writer;
-            GsonBuilder builder = new GsonBuilder();
-            builder.registerTypeHierarchyAdapter(Metadata.class, new JsonMetadataSerializer());
-            gson = builder.create();
         }
         
         @Override
         public void endDocument() throws SAXException {
-                gson.toJson(metadata, writer);
+            try {
+                JsonMetadata.toJson(metadata, writer);
                 writer.flush();
-        }   
-    }    
+            } catch (TikaException e) {
+                throw new SAXException(e);
+            }
+        }        
+    }
 }

Added: tika/trunk/tika-serialization/pom.xml
URL: http://svn.apache.org/viewvc/tika/trunk/tika-serialization/pom.xml?rev=1600554&view=auto
==============================================================================
--- tika/trunk/tika-serialization/pom.xml (added)
+++ tika/trunk/tika-serialization/pom.xml Thu Jun  5 01:42:27 2014
@@ -0,0 +1,101 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<!-- 
+  Licensed to the Apache Software Foundation (ASF) under one 
+  or more contributor license agreements. See the NOTICE file 
+  distributed with this work for additional information 
+  regarding copyright ownership. The ASF licenses this file 
+  to you under the Apache License, Version 2.0 (the 
+  "License"); you may not use this file except in compliance 
+  with the License. You may obtain a copy of the License at 
+  
+    http://www.apache.org/licenses/LICENSE-2.0 
+
+  Unless required by applicable law or agreed to in writing, 
+  software distributed under the License is distributed on an 
+  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 
+  KIND, either express or implied. See the License for the 
+  specific language governing permissions and limitations 
+  under the License. 
+-->
+
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+  <modelVersion>4.0.0</modelVersion>
+  <parent>
+    <groupId>org.apache.tika</groupId>
+    <artifactId>tika-parent</artifactId>
+    <version>1.6-SNAPSHOT</version>
+    <relativePath>../tika-parent/pom.xml</relativePath>
+  </parent>
+
+  <artifactId>tika-serialization</artifactId>
+  <name>Apache Tika serialization</name>
+  <url>http://tika.apache.org</url>
+
+  <dependencies>
+  <!-- Optional OSGi dependency, used only when running within OSGi -->
+
+    <dependency>
+      <groupId>org.osgi</groupId>
+      <artifactId>org.osgi.core</artifactId>
+      <version>4.0.0</version>
+      <scope>provided</scope>
+      <optional>true</optional>
+    </dependency>
+
+    <dependency>
+      <groupId>${project.groupId}</groupId>
+      <artifactId>tika-core</artifactId>
+      <version>${project.version}</version>
+    </dependency>
+
+    <dependency>
+      <groupId>com.google.code.gson</groupId>
+      <artifactId>gson</artifactId>
+      <version>1.7.1</version>
+    </dependency>
+
+    <!-- Test dependencies -->
+    <dependency>
+      <groupId>junit</groupId>
+      <artifactId>junit</artifactId>
+      <scope>test</scope>
+    </dependency>
+
+  </dependencies>
+  <build>
+    <plugins>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-jar-plugin</artifactId>
+        <executions>
+          <execution>
+            <goals>
+              <goal>test-jar</goal>
+            </goals>
+          </execution>
+        </executions>
+      </plugin>
+    </plugins>
+  </build>
+
+
+  <organization>
+    <name>The Apache Software Foundation</name>
+    <url>http://www.apache.org</url>
+  </organization>
+  <scm>
+    <url>http://svn.apache.org/viewvc/tika/trunk/tika-app</url>
+    <connection>scm:svn:http://svn.apache.org/repos/asf/tika/trunk/tika-serialization</connection>
+    <developerConnection>scm:svn:https://svn.apache.org/repos/asf/tika/trunk/tika-serialization</developerConnection>
+  </scm>
+  <issueManagement>
+    <system>JIRA</system>
+    <url>https://issues.apache.org/jira/browse/TIKA</url>
+  </issueManagement>
+  <ciManagement>
+    <system>Jenkins</system>
+    <url>https://builds.apache.org/job/Tika-trunk/</url>
+  </ciManagement>
+
+</project>

Added: tika/trunk/tika-serialization/src/main/java/org/apache/tika/metadata/serialization/JsonMetadata.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-serialization/src/main/java/org/apache/tika/metadata/serialization/JsonMetadata.java?rev=1600554&view=auto
==============================================================================
--- tika/trunk/tika-serialization/src/main/java/org/apache/tika/metadata/serialization/JsonMetadata.java (added)
+++ tika/trunk/tika-serialization/src/main/java/org/apache/tika/metadata/serialization/JsonMetadata.java Thu Jun  5 01:42:27 2014
@@ -0,0 +1,86 @@
+package org.apache.tika.metadata.serialization;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+import java.io.Reader;
+import java.io.Writer;
+
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.metadata.Metadata;
+
+import com.google.gson.Gson;
+import com.google.gson.GsonBuilder;
+import com.google.gson.JsonIOException;
+
+public class JsonMetadata {
+    
+    private static Gson GSON;
+    
+    static {
+        GsonBuilder builder = new GsonBuilder();
+        builder.registerTypeHierarchyAdapter(Metadata.class, new JsonMetadataSerializer());
+        builder.registerTypeHierarchyAdapter(Metadata.class, new JsonMetadataDeserializer());
+        GSON = builder.create();
+    }
+
+    
+    /**
+     * Serializes a Metadata object to Json.  This does not flush or close the writer.
+     * 
+     * @param metadata metadata to write
+     * @param writer writer
+     * @throws TikaException if there is an IOException during writing
+     */
+    public static void toJson(Metadata metadata, Writer writer) throws TikaException {
+        try {
+            GSON.toJson(metadata, writer);
+        } catch (JsonIOException e) {
+            throw new TikaException(e.getMessage());
+        }
+    }
+        
+    /**
+     * Read metadata from reader.
+     *
+     * @param reader reader to read from
+     * @return Metadata or null if nothing could be read from the reader
+     * @throws TikaException in case of parse failure by Gson or IO failure with Reader
+     */
+    public static Metadata fromJson(Reader reader) throws TikaException {
+        Metadata m = null;
+        try {
+            m = GSON.fromJson(reader, Metadata.class);
+        } catch (com.google.gson.JsonParseException e){
+            //covers both io and parse exceptions
+            throw new TikaException(e.getMessage());
+        }
+        return m;
+    }
+    
+    /**
+     * Enables setting custom configurations on Gson.  Remember to register
+     * a serializer and a deserializer for Metadata.  This does a literal set
+     * and does not add the default serializer and deserializers.
+     * 
+     * @param gson
+     */
+    public static void setGson(Gson gson) {
+        GSON = gson;
+    }
+}

Added: tika/trunk/tika-serialization/src/main/java/org/apache/tika/metadata/serialization/JsonMetadataDeserializer.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-serialization/src/main/java/org/apache/tika/metadata/serialization/JsonMetadataDeserializer.java?rev=1600554&view=auto
==============================================================================
--- tika/trunk/tika-serialization/src/main/java/org/apache/tika/metadata/serialization/JsonMetadataDeserializer.java (added)
+++ tika/trunk/tika-serialization/src/main/java/org/apache/tika/metadata/serialization/JsonMetadataDeserializer.java Thu Jun  5 01:42:27 2014
@@ -0,0 +1,75 @@
+package org.apache.tika.metadata.serialization;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.reflect.Type;
+import java.util.Iterator;
+import java.util.Map;
+
+import org.apache.tika.metadata.Metadata;
+
+import com.google.gson.JsonArray;
+import com.google.gson.JsonDeserializationContext;
+import com.google.gson.JsonDeserializer;
+import com.google.gson.JsonElement;
+import com.google.gson.JsonObject;
+import com.google.gson.JsonParseException;
+
+
+/**
+ * Deserializer for Metadata
+ *
+ * If overriding this, remember that this is called from a static context.
+ * Share state only with great caution.
+ */
+public class JsonMetadataDeserializer implements JsonDeserializer<Metadata> {
+
+    /**
+     * Deserializes a json object (equivalent to: Map<String, String[]>) 
+     * into a Metadata object.
+     * 
+     * @param element to serialize
+     * @param type (ignored)
+     * @param context (ignored)
+     * @return Metadata 
+     * @throws JsonParseException if element is not able to be parsed
+     */
+    @Override
+    public Metadata deserialize(JsonElement element, Type type,
+            JsonDeserializationContext context) throws JsonParseException {
+
+        final JsonObject obj = element.getAsJsonObject();
+        Metadata m = new Metadata();
+        for (Map.Entry<String, JsonElement> entry : obj.entrySet()){
+            String key = entry.getKey();
+            JsonElement v = entry.getValue();
+            if (v.isJsonPrimitive()){
+                m.set(key, v.getAsString());
+            } else if (v.isJsonArray()){
+                JsonArray vArr = v.getAsJsonArray();
+                Iterator<JsonElement> itr = vArr.iterator();
+                while (itr.hasNext()){
+                    JsonElement valueItem = itr.next();
+                    m.add(key, valueItem.getAsString());
+                }
+
+            }
+        }
+        return m;
+    }
+}

Added: tika/trunk/tika-serialization/src/main/java/org/apache/tika/metadata/serialization/JsonMetadataSerializer.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-serialization/src/main/java/org/apache/tika/metadata/serialization/JsonMetadataSerializer.java?rev=1600554&view=auto
==============================================================================
--- tika/trunk/tika-serialization/src/main/java/org/apache/tika/metadata/serialization/JsonMetadataSerializer.java (added)
+++ tika/trunk/tika-serialization/src/main/java/org/apache/tika/metadata/serialization/JsonMetadataSerializer.java Thu Jun  5 01:42:27 2014
@@ -0,0 +1,97 @@
+package org.apache.tika.metadata.serialization;
+
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License.  You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+import java.lang.reflect.Type;
+import java.util.Arrays;
+
+import org.apache.tika.metadata.Metadata;
+
+import com.google.gson.JsonArray;
+import com.google.gson.JsonElement;
+import com.google.gson.JsonNull;
+import com.google.gson.JsonObject;
+import com.google.gson.JsonPrimitive;
+import com.google.gson.JsonSerializationContext;
+import com.google.gson.JsonSerializer;
+
+
+/**
+ * Serializer for Metadata
+ * 
+ * If overriding this, remember that this is called from a static context.
+ * Share state only with great caution.
+ *
+ */
+public class JsonMetadataSerializer implements JsonSerializer<Metadata> {
+
+
+    /**
+     * Serializes a Metadata object into effectively Map<String, String[]>.
+     * 
+     * @param metadata object to serialize
+     * @param type (ignored)
+     * @param context (ignored)
+     * @return JsonElement with key/value(s) pairs or JsonNull if metadata is null.
+     */
+    @Override
+    public JsonElement serialize(Metadata metadata, Type type, JsonSerializationContext context) {
+        if (metadata == null){
+            return new JsonNull();
+        }
+        String[] names = getNames(metadata);
+        if (names == null) {
+            return new JsonNull();
+        }
+
+        JsonObject root = new JsonObject();
+
+        for (String n : names) {
+            
+            String[] vals = metadata.getValues(n);
+            if (vals == null) {
+                //silently skip
+                continue;
+            }
+            
+            if (vals.length == 1){
+                root.addProperty(n, vals[0]);
+            } else {
+                JsonArray jArr = new JsonArray();
+                for (int i = 0; i < vals.length; i++) {
+                    jArr.add(new JsonPrimitive(vals[i]));
+                }
+                root.add(n, jArr);
+            }
+        }
+        return root;
+    }
+    
+    /**
+     * Override to get a custom sort order
+     * or to filter names.
+     * 
+     * @param metadata metadata from which to grab names
+     * @return list of names in the order in which they should be serialized
+     */
+    protected String[] getNames(Metadata metadata) {
+        String[] names = metadata.names();
+        Arrays.sort(names);
+        return names;
+    }
+}

Added: tika/trunk/tika-serialization/src/test/java/org/apache/tika/metadata/serialization/JsonMetadataTest.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-serialization/src/test/java/org/apache/tika/metadata/serialization/JsonMetadataTest.java?rev=1600554&view=auto
==============================================================================
--- tika/trunk/tika-serialization/src/test/java/org/apache/tika/metadata/serialization/JsonMetadataTest.java (added)
+++ tika/trunk/tika-serialization/src/test/java/org/apache/tika/metadata/serialization/JsonMetadataTest.java Thu Jun  5 01:42:27 2014
@@ -0,0 +1,113 @@
+package org.apache.tika.metadata.serialization;
+
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License.  You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+import static org.junit.Assert.*;
+
+import java.io.StringReader;
+import java.io.StringWriter;
+
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.metadata.Metadata;
+import org.junit.Test;
+
+public class JsonMetadataTest {
+
+    @Test
+    public void testBasicSerializationAndDeserialization() throws Exception {
+        Metadata metadata = new Metadata();
+        metadata.add("k1", "v1");
+        metadata.add("k1", "v2");
+        //test duplicate value
+        metadata.add("k3", "v3");
+        metadata.add("k3", "v3");
+        //test numeral with comma
+        metadata.add("k4", "500,000");
+        //test Chinese
+        metadata.add("alma_mater", "\u666E\u6797\u65AF\u987F\u5927\u5B66");
+        //test url
+        metadata.add("url", "/myApp/myAction.html?method=router&cmd=1");
+        //simple html entities
+        metadata.add("html", "<html><body>&amp;&nbsp;</body></html>");
+        //simple json escape chars
+        metadata.add("json_escapes", "the: \"quick\" brown, fox");
+        
+        StringWriter writer = new StringWriter();
+        JsonMetadata.toJson(metadata, writer);
+        Metadata deserialized = JsonMetadata.fromJson(new StringReader(writer.toString()));
+        assertEquals(7, deserialized.names().length);
+        assertEquals(metadata, deserialized);
+
+        //test that this really is 6 Chinese characters
+        assertEquals(6, deserialized.get("alma_mater").length());
+    }
+    
+    @Test
+    public void testDeserializationException() {
+        //malformed json; 500,000 should be in quotes
+        String json = "{\"k1\":[\"v1\",\"v2\"],\"k3\":\"v3\",\"k4\":500,000}";
+        boolean ex = false;
+        try {
+            Metadata deserialized = JsonMetadata.fromJson(new StringReader(json));
+        } catch (TikaException e) {
+            ex = true;
+        }
+        assertTrue(ex);
+    }
+    
+    @Test
+    public void testNull() {
+        StringWriter writer = new StringWriter();
+        boolean ex = false;
+        try {
+            JsonMetadata.toJson(null, writer);
+        } catch (TikaException e) {
+            ex = true;
+        }
+        assertFalse(ex);
+        assertEquals("", writer.toString());        
+    }
+
+    @Test
+    public void testLargeNumberOfKeys() throws Exception {
+        Metadata m = new Metadata();
+        for (int i = 0; i < 100000; i++) {
+            m.set(Integer.toString(i), "val_"+i);
+        }
+        StringWriter writer = new StringWriter();
+        JsonMetadata.toJson(m, writer);
+        Metadata deserialized = JsonMetadata.fromJson(new StringReader(writer.toString()));
+        assertEquals(m, deserialized);        
+    }
+    
+    @Test
+    public void testLargeValues() throws Exception {
+        StringBuilder sb = new StringBuilder();
+        for (int i = 0; i < 1000000; i++){
+            sb.append("v");
+        }
+        Metadata m = new Metadata();
+        m.add("large_value1", sb.toString());
+        m.add("large_value2", sb.toString());
+        StringWriter writer = new StringWriter();
+        JsonMetadata.toJson(m, writer);
+        Metadata deserialized = JsonMetadata.fromJson(new StringReader(writer.toString()));
+        assertEquals(m, deserialized);        
+    }
+
+}

Modified: tika/trunk/tika-server/pom.xml
URL: http://svn.apache.org/viewvc/tika/trunk/tika-server/pom.xml?rev=1600554&r1=1600553&r2=1600554&view=diff
==============================================================================
--- tika/trunk/tika-server/pom.xml (original)
+++ tika/trunk/tika-server/pom.xml Thu Jun  5 01:42:27 2014
@@ -34,6 +34,11 @@
       <version>${project.version}</version>
     </dependency>
     <dependency>
+      <groupId>${project.groupId}</groupId>
+      <artifactId>tika-serialization</artifactId>
+      <version>${project.version}</version>
+    </dependency>
+    <dependency>
       <groupId>net.sf.opencsv</groupId>
       <artifactId>opencsv</artifactId>
       <version>2.0</version>

Modified: tika/trunk/tika-server/src/main/java/org/apache/tika/server/JSONMessageBodyWriter.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-server/src/main/java/org/apache/tika/server/JSONMessageBodyWriter.java?rev=1600554&r1=1600553&r2=1600554&view=diff
==============================================================================
--- tika/trunk/tika-server/src/main/java/org/apache/tika/server/JSONMessageBodyWriter.java (original)
+++ tika/trunk/tika-server/src/main/java/org/apache/tika/server/JSONMessageBodyWriter.java Thu Jun  5 01:42:27 2014
@@ -17,9 +17,9 @@
 
 package org.apache.tika.server;
 
-import org.apache.tika.io.IOUtils;
+import org.apache.tika.exception.TikaException;
 import org.apache.tika.metadata.Metadata;
-import org.eclipse.jetty.util.ajax.JSON;
+import org.apache.tika.metadata.serialization.JsonMetadata;
 
 import javax.ws.rs.Produces;
 import javax.ws.rs.WebApplicationException;
@@ -30,11 +30,10 @@ import javax.ws.rs.ext.Provider;
 
 import java.io.IOException;
 import java.io.OutputStream;
-import java.io.StringReader;
+import java.io.OutputStreamWriter;
+import java.io.Writer;
 import java.lang.annotation.Annotation;
 import java.lang.reflect.Type;
-import java.util.Map;
-import java.util.TreeMap;
 
 @Provider
 @Produces(MediaType.APPLICATION_JSON)
@@ -52,22 +51,13 @@ public class JSONMessageBodyWriter imple
   public void writeTo(Metadata metadata, Class<?> type, Type genericType, Annotation[] annotations,
       MediaType mediaType, MultivaluedMap<String, Object> httpHeaders, OutputStream entityStream) throws IOException,
       WebApplicationException {
-
-      Map<String, Object> res = new TreeMap<String, Object>();
-
-    for (String name : metadata.names()) {
-      String[] values = metadata.getValues(name);
-      if (metadata.isMultiValued(name)) {
-        res.put(name, values);
-      } else {
-        res.put(name, values[0]);
-      }
-    }
-
-    String json = JSON.toString(res);
-    System.err.println("JSON : "+json);
-    StringReader r = new StringReader(json);
-    IOUtils.copy(r, entityStream);
-    entityStream.flush();
+        try {
+            Writer writer = new OutputStreamWriter(entityStream, "UTF-8");
+            JsonMetadata.toJson(metadata, writer);
+            writer.flush();
+        } catch (TikaException e) {
+            throw new IOException(e);
+        }
+        entityStream.flush();
   }
 }

Modified: tika/trunk/tika-server/src/test/java/org/apache/tika/server/MetadataEPTest.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-server/src/test/java/org/apache/tika/server/MetadataEPTest.java?rev=1600554&r1=1600553&r2=1600554&view=diff
==============================================================================
--- tika/trunk/tika-server/src/test/java/org/apache/tika/server/MetadataEPTest.java (original)
+++ tika/trunk/tika-server/src/test/java/org/apache/tika/server/MetadataEPTest.java Thu Jun  5 01:42:27 2014
@@ -39,7 +39,8 @@ import javax.ws.rs.core.Response.Status;
 import org.apache.cxf.jaxrs.JAXRSServerFactoryBean;
 import org.apache.cxf.jaxrs.client.WebClient;
 import org.apache.tika.io.IOUtils;
-import org.eclipse.jetty.util.ajax.JSON;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.serialization.JsonMetadata;
 import org.junit.Assert;
 import org.junit.Test;
 
@@ -105,8 +106,7 @@ public class MetadataEPTest extends CXFT
     Assert.assertEquals(Status.OK.getStatusCode(), response.getStatus());
 
     Reader reader = new InputStreamReader((InputStream) response.getEntity());
-    Map<?, ?> metadata = (Map<?, ?>) JSON.parse(reader);
-
+    Metadata metadata = JsonMetadata.fromJson(reader);
     assertNotNull(metadata.get("Author"));
     assertEquals("Maxim Valyanskiy", metadata.get("Author"));
   }
@@ -129,7 +129,7 @@ public class MetadataEPTest extends CXFT
     Assert.assertEquals(Status.OK.getStatusCode(), response.getStatus());
 
     Reader reader = new InputStreamReader((InputStream) response.getEntity());
-    Map<?, ?> metadata = (Map<?, ?>) JSON.parse(reader);
+    Metadata metadata = JsonMetadata.fromJson(reader);
 
     assertNotNull(metadata.get("Author"));
     assertEquals("Maxim Valyanskiy", metadata.get("Author"));