You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2014/06/05 03:42:28 UTC
svn commit: r1600554 - in /tika/trunk: ./ src/site/apt/ tika-app/
tika-app/src/main/java/org/apache/tika/cli/ tika-serialization/
tika-serialization/src/ tika-serialization/src/main/
tika-serialization/src/main/java/ tika-serialization/src/main/java/or...
Author: tallison
Date: Thu Jun 5 01:42:27 2014
New Revision: 1600554
URL: http://svn.apache.org/r1600554
Log:
TIKA-1311 centralize serialization
Added:
tika/trunk/tika-serialization/
tika/trunk/tika-serialization/pom.xml
tika/trunk/tika-serialization/src/
tika/trunk/tika-serialization/src/main/
tika/trunk/tika-serialization/src/main/java/
tika/trunk/tika-serialization/src/main/java/org/
tika/trunk/tika-serialization/src/main/java/org/apache/
tika/trunk/tika-serialization/src/main/java/org/apache/tika/
tika/trunk/tika-serialization/src/main/java/org/apache/tika/metadata/
tika/trunk/tika-serialization/src/main/java/org/apache/tika/metadata/serialization/
tika/trunk/tika-serialization/src/main/java/org/apache/tika/metadata/serialization/JsonMetadata.java
tika/trunk/tika-serialization/src/main/java/org/apache/tika/metadata/serialization/JsonMetadataDeserializer.java
tika/trunk/tika-serialization/src/main/java/org/apache/tika/metadata/serialization/JsonMetadataSerializer.java
tika/trunk/tika-serialization/src/test/
tika/trunk/tika-serialization/src/test/java/
tika/trunk/tika-serialization/src/test/java/org/
tika/trunk/tika-serialization/src/test/java/org/apache/
tika/trunk/tika-serialization/src/test/java/org/apache/tika/
tika/trunk/tika-serialization/src/test/java/org/apache/tika/metadata/
tika/trunk/tika-serialization/src/test/java/org/apache/tika/metadata/serialization/
tika/trunk/tika-serialization/src/test/java/org/apache/tika/metadata/serialization/JsonMetadataTest.java
Modified:
tika/trunk/pom.xml
tika/trunk/src/site/apt/gettingstarted.apt
tika/trunk/tika-app/pom.xml
tika/trunk/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java
tika/trunk/tika-server/pom.xml
tika/trunk/tika-server/src/main/java/org/apache/tika/server/JSONMessageBodyWriter.java
tika/trunk/tika-server/src/test/java/org/apache/tika/server/MetadataEPTest.java
Modified: tika/trunk/pom.xml
URL: http://svn.apache.org/viewvc/tika/trunk/pom.xml?rev=1600554&r1=1600553&r2=1600554&view=diff
==============================================================================
--- tika/trunk/pom.xml (original)
+++ tika/trunk/pom.xml Thu Jun 5 01:42:27 2014
@@ -49,6 +49,7 @@
<module>tika-core</module>
<module>tika-parsers</module>
<module>tika-xmp</module>
+ <module>tika-serialization</module>
<module>tika-app</module>
<module>tika-bundle</module>
<module>tika-server</module>
Modified: tika/trunk/src/site/apt/gettingstarted.apt
URL: http://svn.apache.org/viewvc/tika/trunk/src/site/apt/gettingstarted.apt?rev=1600554&r1=1600553&r2=1600554&view=diff
==============================================================================
--- tika/trunk/src/site/apt/gettingstarted.apt (original)
+++ tika/trunk/src/site/apt/gettingstarted.apt Thu Jun 5 01:42:27 2014
@@ -56,6 +56,10 @@ Build artifacts
Tika parsers. Collection of classes that implement the Tika Parser
interface based on various external parser libraries.
+ [tika-serialization/target/tika-serialization-*.jar]
+ Serialization utilities. This is designed to centralize serialization
+ of common Tika objects. This package is used by tika-app and tika-server.
+
[tika-app/target/tika-app-*.jar]
Tika application. Combines the above components and all the external
parser libraries into a single runnable jar with a GUI and a command
@@ -65,6 +69,14 @@ Build artifacts
Tika bundle. An OSGi bundle that combines tika-parsers with non-OSGified
parser libraries to make them easy to deploy in an OSGi environment.
+ [tika-server/target/tika-server-*.jar]
+ Tika server. Tika server uses the Apache CXF framework and provides
+ an implementation of JAX-RS for Java. The tika server component builds
+ to a standalone package in Tika. See
+ {{{http://wiki.apache.org/tika/TikaJAXRS}the wiki}}
+ for information on running this server
+
+
Using Tika as a Maven dependency
The core library, tika-core, contains the key interfaces and classes of Tika
Modified: tika/trunk/tika-app/pom.xml
URL: http://svn.apache.org/viewvc/tika/trunk/tika-app/pom.xml?rev=1600554&r1=1600553&r2=1600554&view=diff
==============================================================================
--- tika/trunk/tika-app/pom.xml (original)
+++ tika/trunk/tika-app/pom.xml Thu Jun 5 01:42:27 2014
@@ -45,6 +45,11 @@
</dependency>
<dependency>
<groupId>${project.groupId}</groupId>
+ <artifactId>tika-serialization</artifactId>
+ <version>${project.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>${project.groupId}</groupId>
<artifactId>tika-xmp</artifactId>
<version>${project.version}</version>
</dependency>
Modified: tika/trunk/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java?rev=1600554&r1=1600553&r2=1600554&view=diff
==============================================================================
--- tika/trunk/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java (original)
+++ tika/trunk/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java Thu Jun 5 01:42:27 2014
@@ -45,9 +45,6 @@ import javax.xml.transform.sax.SAXTransf
import javax.xml.transform.sax.TransformerHandler;
import javax.xml.transform.stream.StreamResult;
-import com.google.gson.Gson;
-import com.google.gson.GsonBuilder;
-
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.log4j.BasicConfigurator;
@@ -75,6 +72,7 @@ import org.apache.tika.io.json.JsonMetad
import org.apache.tika.language.LanguageProfilerBuilder;
import org.apache.tika.language.ProfilingHandler;
import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.serialization.JsonMetadata;
import org.apache.tika.mime.MediaType;
import org.apache.tika.mime.MediaTypeRegistry;
import org.apache.tika.mime.MimeTypeException;
@@ -900,29 +898,25 @@ public class TikaCLI {
}
}
- /**
- * Uses GSON.
- */
private class NoDocumentJSONMetHandler extends DefaultHandler {
- private final Gson gson;
-
protected final Metadata metadata;
protected PrintWriter writer;
- public NoDocumentJSONMetHandler(Metadata metadata, PrintWriter writer){
+ public NoDocumentJSONMetHandler(Metadata metadata, PrintWriter writer) {
this.metadata = metadata;
this.writer = writer;
- GsonBuilder builder = new GsonBuilder();
- builder.registerTypeHierarchyAdapter(Metadata.class, new JsonMetadataSerializer());
- gson = builder.create();
}
@Override
public void endDocument() throws SAXException {
- gson.toJson(metadata, writer);
+ try {
+ JsonMetadata.toJson(metadata, writer);
writer.flush();
- }
- }
+ } catch (TikaException e) {
+ throw new SAXException(e);
+ }
+ }
+ }
}
Added: tika/trunk/tika-serialization/pom.xml
URL: http://svn.apache.org/viewvc/tika/trunk/tika-serialization/pom.xml?rev=1600554&view=auto
==============================================================================
--- tika/trunk/tika-serialization/pom.xml (added)
+++ tika/trunk/tika-serialization/pom.xml Thu Jun 5 01:42:27 2014
@@ -0,0 +1,101 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+-->
+
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+ <modelVersion>4.0.0</modelVersion>
+ <parent>
+ <groupId>org.apache.tika</groupId>
+ <artifactId>tika-parent</artifactId>
+ <version>1.6-SNAPSHOT</version>
+ <relativePath>../tika-parent/pom.xml</relativePath>
+ </parent>
+
+ <artifactId>tika-serialization</artifactId>
+ <name>Apache Tika serialization</name>
+ <url>http://tika.apache.org</url>
+
+ <dependencies>
+ <!-- Optional OSGi dependency, used only when running within OSGi -->
+
+ <dependency>
+ <groupId>org.osgi</groupId>
+ <artifactId>org.osgi.core</artifactId>
+ <version>4.0.0</version>
+ <scope>provided</scope>
+ <optional>true</optional>
+ </dependency>
+
+ <dependency>
+ <groupId>${project.groupId}</groupId>
+ <artifactId>tika-core</artifactId>
+ <version>${project.version}</version>
+ </dependency>
+
+ <dependency>
+ <groupId>com.google.code.gson</groupId>
+ <artifactId>gson</artifactId>
+ <version>1.7.1</version>
+ </dependency>
+
+ <!-- Test dependencies -->
+ <dependency>
+ <groupId>junit</groupId>
+ <artifactId>junit</artifactId>
+ <scope>test</scope>
+ </dependency>
+
+ </dependencies>
+ <build>
+ <plugins>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-jar-plugin</artifactId>
+ <executions>
+ <execution>
+ <goals>
+ <goal>test-jar</goal>
+ </goals>
+ </execution>
+ </executions>
+ </plugin>
+ </plugins>
+ </build>
+
+
+ <organization>
+ <name>The Apache Software Foundation</name>
+ <url>http://www.apache.org</url>
+ </organization>
+ <scm>
+ <url>http://svn.apache.org/viewvc/tika/trunk/tika-app</url>
+ <connection>scm:svn:http://svn.apache.org/repos/asf/tika/trunk/tika-serialization</connection>
+ <developerConnection>scm:svn:https://svn.apache.org/repos/asf/tika/trunk/tika-serialization</developerConnection>
+ </scm>
+ <issueManagement>
+ <system>JIRA</system>
+ <url>https://issues.apache.org/jira/browse/TIKA</url>
+ </issueManagement>
+ <ciManagement>
+ <system>Jenkins</system>
+ <url>https://builds.apache.org/job/Tika-trunk/</url>
+ </ciManagement>
+
+</project>
Added: tika/trunk/tika-serialization/src/main/java/org/apache/tika/metadata/serialization/JsonMetadata.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-serialization/src/main/java/org/apache/tika/metadata/serialization/JsonMetadata.java?rev=1600554&view=auto
==============================================================================
--- tika/trunk/tika-serialization/src/main/java/org/apache/tika/metadata/serialization/JsonMetadata.java (added)
+++ tika/trunk/tika-serialization/src/main/java/org/apache/tika/metadata/serialization/JsonMetadata.java Thu Jun 5 01:42:27 2014
@@ -0,0 +1,86 @@
+package org.apache.tika.metadata.serialization;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+import java.io.Reader;
+import java.io.Writer;
+
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.metadata.Metadata;
+
+import com.google.gson.Gson;
+import com.google.gson.GsonBuilder;
+import com.google.gson.JsonIOException;
+
+public class JsonMetadata {
+
+ private static Gson GSON;
+
+ static {
+ GsonBuilder builder = new GsonBuilder();
+ builder.registerTypeHierarchyAdapter(Metadata.class, new JsonMetadataSerializer());
+ builder.registerTypeHierarchyAdapter(Metadata.class, new JsonMetadataDeserializer());
+ GSON = builder.create();
+ }
+
+
+ /**
+ * Serializes a Metadata object to Json. This does not flush or close the writer.
+ *
+ * @param metadata metadata to write
+ * @param writer writer
+ * @throws TikaException if there is an IOException during writing
+ */
+ public static void toJson(Metadata metadata, Writer writer) throws TikaException {
+ try {
+ GSON.toJson(metadata, writer);
+ } catch (JsonIOException e) {
+ throw new TikaException(e.getMessage());
+ }
+ }
+
+ /**
+ * Read metadata from reader.
+ *
+ * @param reader reader to read from
+ * @return Metadata or null if nothing could be read from the reader
+ * @throws TikaException in case of parse failure by Gson or IO failure with Reader
+ */
+ public static Metadata fromJson(Reader reader) throws TikaException {
+ Metadata m = null;
+ try {
+ m = GSON.fromJson(reader, Metadata.class);
+ } catch (com.google.gson.JsonParseException e){
+ //covers both io and parse exceptions
+ throw new TikaException(e.getMessage());
+ }
+ return m;
+ }
+
+ /**
+ * Enables setting custom configurations on Gson. Remember to register
+ * a serializer and a deserializer for Metadata. This does a literal set
+ * and does not add the default serializer and deserializers.
+ *
+ * @param gson
+ */
+ public static void setGson(Gson gson) {
+ GSON = gson;
+ }
+}
Added: tika/trunk/tika-serialization/src/main/java/org/apache/tika/metadata/serialization/JsonMetadataDeserializer.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-serialization/src/main/java/org/apache/tika/metadata/serialization/JsonMetadataDeserializer.java?rev=1600554&view=auto
==============================================================================
--- tika/trunk/tika-serialization/src/main/java/org/apache/tika/metadata/serialization/JsonMetadataDeserializer.java (added)
+++ tika/trunk/tika-serialization/src/main/java/org/apache/tika/metadata/serialization/JsonMetadataDeserializer.java Thu Jun 5 01:42:27 2014
@@ -0,0 +1,75 @@
+package org.apache.tika.metadata.serialization;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.reflect.Type;
+import java.util.Iterator;
+import java.util.Map;
+
+import org.apache.tika.metadata.Metadata;
+
+import com.google.gson.JsonArray;
+import com.google.gson.JsonDeserializationContext;
+import com.google.gson.JsonDeserializer;
+import com.google.gson.JsonElement;
+import com.google.gson.JsonObject;
+import com.google.gson.JsonParseException;
+
+
+/**
+ * Deserializer for Metadata
+ *
+ * If overriding this, remember that this is called from a static context.
+ * Share state only with great caution.
+ */
+public class JsonMetadataDeserializer implements JsonDeserializer<Metadata> {
+
+ /**
+ * Deserializes a json object (equivalent to: Map<String, String[]>)
+ * into a Metadata object.
+ *
+ * @param element to serialize
+ * @param type (ignored)
+ * @param context (ignored)
+ * @return Metadata
+ * @throws JsonParseException if element is not able to be parsed
+ */
+ @Override
+ public Metadata deserialize(JsonElement element, Type type,
+ JsonDeserializationContext context) throws JsonParseException {
+
+ final JsonObject obj = element.getAsJsonObject();
+ Metadata m = new Metadata();
+ for (Map.Entry<String, JsonElement> entry : obj.entrySet()){
+ String key = entry.getKey();
+ JsonElement v = entry.getValue();
+ if (v.isJsonPrimitive()){
+ m.set(key, v.getAsString());
+ } else if (v.isJsonArray()){
+ JsonArray vArr = v.getAsJsonArray();
+ Iterator<JsonElement> itr = vArr.iterator();
+ while (itr.hasNext()){
+ JsonElement valueItem = itr.next();
+ m.add(key, valueItem.getAsString());
+ }
+
+ }
+ }
+ return m;
+ }
+}
Added: tika/trunk/tika-serialization/src/main/java/org/apache/tika/metadata/serialization/JsonMetadataSerializer.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-serialization/src/main/java/org/apache/tika/metadata/serialization/JsonMetadataSerializer.java?rev=1600554&view=auto
==============================================================================
--- tika/trunk/tika-serialization/src/main/java/org/apache/tika/metadata/serialization/JsonMetadataSerializer.java (added)
+++ tika/trunk/tika-serialization/src/main/java/org/apache/tika/metadata/serialization/JsonMetadataSerializer.java Thu Jun 5 01:42:27 2014
@@ -0,0 +1,97 @@
+package org.apache.tika.metadata.serialization;
+
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements. See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License. You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+import java.lang.reflect.Type;
+import java.util.Arrays;
+
+import org.apache.tika.metadata.Metadata;
+
+import com.google.gson.JsonArray;
+import com.google.gson.JsonElement;
+import com.google.gson.JsonNull;
+import com.google.gson.JsonObject;
+import com.google.gson.JsonPrimitive;
+import com.google.gson.JsonSerializationContext;
+import com.google.gson.JsonSerializer;
+
+
+/**
+ * Serializer for Metadata
+ *
+ * If overriding this, remember that this is called from a static context.
+ * Share state only with great caution.
+ *
+ */
+public class JsonMetadataSerializer implements JsonSerializer<Metadata> {
+
+
+ /**
+ * Serializes a Metadata object into effectively Map<String, String[]>.
+ *
+ * @param metadata object to serialize
+ * @param type (ignored)
+ * @param context (ignored)
+ * @return JsonElement with key/value(s) pairs or JsonNull if metadata is null.
+ */
+ @Override
+ public JsonElement serialize(Metadata metadata, Type type, JsonSerializationContext context) {
+ if (metadata == null){
+ return new JsonNull();
+ }
+ String[] names = getNames(metadata);
+ if (names == null) {
+ return new JsonNull();
+ }
+
+ JsonObject root = new JsonObject();
+
+ for (String n : names) {
+
+ String[] vals = metadata.getValues(n);
+ if (vals == null) {
+ //silently skip
+ continue;
+ }
+
+ if (vals.length == 1){
+ root.addProperty(n, vals[0]);
+ } else {
+ JsonArray jArr = new JsonArray();
+ for (int i = 0; i < vals.length; i++) {
+ jArr.add(new JsonPrimitive(vals[i]));
+ }
+ root.add(n, jArr);
+ }
+ }
+ return root;
+ }
+
+ /**
+ * Override to get a custom sort order
+ * or to filter names.
+ *
+ * @param metadata metadata from which to grab names
+ * @return list of names in the order in which they should be serialized
+ */
+ protected String[] getNames(Metadata metadata) {
+ String[] names = metadata.names();
+ Arrays.sort(names);
+ return names;
+ }
+}
Added: tika/trunk/tika-serialization/src/test/java/org/apache/tika/metadata/serialization/JsonMetadataTest.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-serialization/src/test/java/org/apache/tika/metadata/serialization/JsonMetadataTest.java?rev=1600554&view=auto
==============================================================================
--- tika/trunk/tika-serialization/src/test/java/org/apache/tika/metadata/serialization/JsonMetadataTest.java (added)
+++ tika/trunk/tika-serialization/src/test/java/org/apache/tika/metadata/serialization/JsonMetadataTest.java Thu Jun 5 01:42:27 2014
@@ -0,0 +1,113 @@
+package org.apache.tika.metadata.serialization;
+
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements. See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License. You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+import static org.junit.Assert.*;
+
+import java.io.StringReader;
+import java.io.StringWriter;
+
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.metadata.Metadata;
+import org.junit.Test;
+
+public class JsonMetadataTest {
+
+ @Test
+ public void testBasicSerializationAndDeserialization() throws Exception {
+ Metadata metadata = new Metadata();
+ metadata.add("k1", "v1");
+ metadata.add("k1", "v2");
+ //test duplicate value
+ metadata.add("k3", "v3");
+ metadata.add("k3", "v3");
+ //test numeral with comma
+ metadata.add("k4", "500,000");
+ //test Chinese
+ metadata.add("alma_mater", "\u666E\u6797\u65AF\u987F\u5927\u5B66");
+ //test url
+ metadata.add("url", "/myApp/myAction.html?method=router&cmd=1");
+ //simple html entities
+ metadata.add("html", "<html><body>& </body></html>");
+ //simple json escape chars
+ metadata.add("json_escapes", "the: \"quick\" brown, fox");
+
+ StringWriter writer = new StringWriter();
+ JsonMetadata.toJson(metadata, writer);
+ Metadata deserialized = JsonMetadata.fromJson(new StringReader(writer.toString()));
+ assertEquals(7, deserialized.names().length);
+ assertEquals(metadata, deserialized);
+
+ //test that this really is 6 Chinese characters
+ assertEquals(6, deserialized.get("alma_mater").length());
+ }
+
+ @Test
+ public void testDeserializationException() {
+ //malformed json; 500,000 should be in quotes
+ String json = "{\"k1\":[\"v1\",\"v2\"],\"k3\":\"v3\",\"k4\":500,000}";
+ boolean ex = false;
+ try {
+ Metadata deserialized = JsonMetadata.fromJson(new StringReader(json));
+ } catch (TikaException e) {
+ ex = true;
+ }
+ assertTrue(ex);
+ }
+
+ @Test
+ public void testNull() {
+ StringWriter writer = new StringWriter();
+ boolean ex = false;
+ try {
+ JsonMetadata.toJson(null, writer);
+ } catch (TikaException e) {
+ ex = true;
+ }
+ assertFalse(ex);
+ assertEquals("", writer.toString());
+ }
+
+ @Test
+ public void testLargeNumberOfKeys() throws Exception {
+ Metadata m = new Metadata();
+ for (int i = 0; i < 100000; i++) {
+ m.set(Integer.toString(i), "val_"+i);
+ }
+ StringWriter writer = new StringWriter();
+ JsonMetadata.toJson(m, writer);
+ Metadata deserialized = JsonMetadata.fromJson(new StringReader(writer.toString()));
+ assertEquals(m, deserialized);
+ }
+
+ @Test
+ public void testLargeValues() throws Exception {
+ StringBuilder sb = new StringBuilder();
+ for (int i = 0; i < 1000000; i++){
+ sb.append("v");
+ }
+ Metadata m = new Metadata();
+ m.add("large_value1", sb.toString());
+ m.add("large_value2", sb.toString());
+ StringWriter writer = new StringWriter();
+ JsonMetadata.toJson(m, writer);
+ Metadata deserialized = JsonMetadata.fromJson(new StringReader(writer.toString()));
+ assertEquals(m, deserialized);
+ }
+
+}
Modified: tika/trunk/tika-server/pom.xml
URL: http://svn.apache.org/viewvc/tika/trunk/tika-server/pom.xml?rev=1600554&r1=1600553&r2=1600554&view=diff
==============================================================================
--- tika/trunk/tika-server/pom.xml (original)
+++ tika/trunk/tika-server/pom.xml Thu Jun 5 01:42:27 2014
@@ -34,6 +34,11 @@
<version>${project.version}</version>
</dependency>
<dependency>
+ <groupId>${project.groupId}</groupId>
+ <artifactId>tika-serialization</artifactId>
+ <version>${project.version}</version>
+ </dependency>
+ <dependency>
<groupId>net.sf.opencsv</groupId>
<artifactId>opencsv</artifactId>
<version>2.0</version>
Modified: tika/trunk/tika-server/src/main/java/org/apache/tika/server/JSONMessageBodyWriter.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-server/src/main/java/org/apache/tika/server/JSONMessageBodyWriter.java?rev=1600554&r1=1600553&r2=1600554&view=diff
==============================================================================
--- tika/trunk/tika-server/src/main/java/org/apache/tika/server/JSONMessageBodyWriter.java (original)
+++ tika/trunk/tika-server/src/main/java/org/apache/tika/server/JSONMessageBodyWriter.java Thu Jun 5 01:42:27 2014
@@ -17,9 +17,9 @@
package org.apache.tika.server;
-import org.apache.tika.io.IOUtils;
+import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.Metadata;
-import org.eclipse.jetty.util.ajax.JSON;
+import org.apache.tika.metadata.serialization.JsonMetadata;
import javax.ws.rs.Produces;
import javax.ws.rs.WebApplicationException;
@@ -30,11 +30,10 @@ import javax.ws.rs.ext.Provider;
import java.io.IOException;
import java.io.OutputStream;
-import java.io.StringReader;
+import java.io.OutputStreamWriter;
+import java.io.Writer;
import java.lang.annotation.Annotation;
import java.lang.reflect.Type;
-import java.util.Map;
-import java.util.TreeMap;
@Provider
@Produces(MediaType.APPLICATION_JSON)
@@ -52,22 +51,13 @@ public class JSONMessageBodyWriter imple
public void writeTo(Metadata metadata, Class<?> type, Type genericType, Annotation[] annotations,
MediaType mediaType, MultivaluedMap<String, Object> httpHeaders, OutputStream entityStream) throws IOException,
WebApplicationException {
-
- Map<String, Object> res = new TreeMap<String, Object>();
-
- for (String name : metadata.names()) {
- String[] values = metadata.getValues(name);
- if (metadata.isMultiValued(name)) {
- res.put(name, values);
- } else {
- res.put(name, values[0]);
- }
- }
-
- String json = JSON.toString(res);
- System.err.println("JSON : "+json);
- StringReader r = new StringReader(json);
- IOUtils.copy(r, entityStream);
- entityStream.flush();
+ try {
+ Writer writer = new OutputStreamWriter(entityStream, "UTF-8");
+ JsonMetadata.toJson(metadata, writer);
+ writer.flush();
+ } catch (TikaException e) {
+ throw new IOException(e);
+ }
+ entityStream.flush();
}
}
Modified: tika/trunk/tika-server/src/test/java/org/apache/tika/server/MetadataEPTest.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-server/src/test/java/org/apache/tika/server/MetadataEPTest.java?rev=1600554&r1=1600553&r2=1600554&view=diff
==============================================================================
--- tika/trunk/tika-server/src/test/java/org/apache/tika/server/MetadataEPTest.java (original)
+++ tika/trunk/tika-server/src/test/java/org/apache/tika/server/MetadataEPTest.java Thu Jun 5 01:42:27 2014
@@ -39,7 +39,8 @@ import javax.ws.rs.core.Response.Status;
import org.apache.cxf.jaxrs.JAXRSServerFactoryBean;
import org.apache.cxf.jaxrs.client.WebClient;
import org.apache.tika.io.IOUtils;
-import org.eclipse.jetty.util.ajax.JSON;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.serialization.JsonMetadata;
import org.junit.Assert;
import org.junit.Test;
@@ -105,8 +106,7 @@ public class MetadataEPTest extends CXFT
Assert.assertEquals(Status.OK.getStatusCode(), response.getStatus());
Reader reader = new InputStreamReader((InputStream) response.getEntity());
- Map<?, ?> metadata = (Map<?, ?>) JSON.parse(reader);
-
+ Metadata metadata = JsonMetadata.fromJson(reader);
assertNotNull(metadata.get("Author"));
assertEquals("Maxim Valyanskiy", metadata.get("Author"));
}
@@ -129,7 +129,7 @@ public class MetadataEPTest extends CXFT
Assert.assertEquals(Status.OK.getStatusCode(), response.getStatus());
Reader reader = new InputStreamReader((InputStream) response.getEntity());
- Map<?, ?> metadata = (Map<?, ?>) JSON.parse(reader);
+ Metadata metadata = JsonMetadata.fromJson(reader);
assertNotNull(metadata.get("Author"));
assertEquals("Maxim Valyanskiy", metadata.get("Author"));