You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2020/07/15 18:59:41 UTC
[tika] 04/05: TIKA-3134 -- fix bug and add unit tests
This is an automated email from the ASF dual-hosted git repository.
tallison pushed a commit to branch branch_1x
in repository https://gitbox.apache.org/repos/asf/tika.git
commit e57c832a56b7917ff6da01af129c909aaa2ccf69
Author: tallison <ta...@apache.org>
AuthorDate: Wed Jul 15 14:27:28 2020 -0400
TIKA-3134 -- fix bug and add unit tests
---
.../server/resource/RecursiveMetadataResource.java | 18 +++----
.../tika/server/RecursiveMetadataResourceTest.java | 61 ++++++++++++++++++++--
2 files changed, 66 insertions(+), 13 deletions(-)
diff --git a/tika-server/src/main/java/org/apache/tika/server/resource/RecursiveMetadataResource.java b/tika-server/src/main/java/org/apache/tika/server/resource/RecursiveMetadataResource.java
index 15aca64..07d20c5 100644
--- a/tika-server/src/main/java/org/apache/tika/server/resource/RecursiveMetadataResource.java
+++ b/tika-server/src/main/java/org/apache/tika/server/resource/RecursiveMetadataResource.java
@@ -139,17 +139,17 @@ public class RecursiveMetadataResource {
TikaResource.fillParseContext(context, httpHeaders, null);
TikaResource.logRequest(LOG, info, metadata);
- int writeLimit = -1;
- if (httpHeaders.containsKey("writeLimit")) {
- writeLimit = Integer.parseInt(httpHeaders.getFirst("writeLimit"));
- }
+ int writeLimit = -1;
+ if (httpHeaders.containsKey("writeLimit")) {
+ writeLimit = Integer.parseInt(httpHeaders.getFirst("writeLimit"));
+ }
- int maxEmbeddedResources = -1;
- if (httpHeaders.containsKey("maxEmbeddedResources")) {
- writeLimit = Integer.parseInt(httpHeaders.getFirst("maxEmbeddedResources"));
- }
+ int maxEmbeddedResources = -1;
+ if (httpHeaders.containsKey("maxEmbeddedResources")) {
+ maxEmbeddedResources = Integer.parseInt(httpHeaders.getFirst("maxEmbeddedResources"));
+ }
- BasicContentHandlerFactory.HANDLER_TYPE type =
+ BasicContentHandlerFactory.HANDLER_TYPE type =
BasicContentHandlerFactory.parseHandlerType(handlerTypeName, DEFAULT_HANDLER_TYPE);
RecursiveParserWrapperHandler handler = new RecursiveParserWrapperHandler(
new BasicContentHandlerFactory(type, writeLimit), maxEmbeddedResources);
diff --git a/tika-server/src/test/java/org/apache/tika/server/RecursiveMetadataResourceTest.java b/tika-server/src/test/java/org/apache/tika/server/RecursiveMetadataResourceTest.java
index d43b741..a65efdc 100644
--- a/tika-server/src/test/java/org/apache/tika/server/RecursiveMetadataResourceTest.java
+++ b/tika-server/src/test/java/org/apache/tika/server/RecursiveMetadataResourceTest.java
@@ -18,6 +18,7 @@
package org.apache.tika.server;
import static java.nio.charset.StandardCharsets.UTF_8;
+import static org.apache.tika.TikaTest.assertNotContained;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertNull;
@@ -25,8 +26,6 @@ import static org.junit.Assert.assertTrue;
import javax.ws.rs.core.Response;
-import java.io.ByteArrayInputStream;
-import java.io.ByteArrayOutputStream;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
@@ -34,12 +33,10 @@ import java.util.ArrayList;
import java.util.List;
import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream;
-import org.apache.commons.compress.compressors.gzip.GzipCompressorOutputStream;
import org.apache.cxf.jaxrs.JAXRSServerFactoryBean;
import org.apache.cxf.jaxrs.client.WebClient;
import org.apache.cxf.jaxrs.ext.multipart.Attachment;
import org.apache.cxf.jaxrs.lifecycle.SingletonResourceProvider;
-import org.apache.tika.io.IOUtils;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.OfficeOpenXMLExtended;
import org.apache.tika.metadata.TikaCoreProperties;
@@ -323,4 +320,60 @@ public class RecursiveMetadataResourceTest extends CXFTestBase {
assertNull(metadataList.get(6).get(AbstractRecursiveParserWrapperHandler.TIKA_CONTENT));
}
+ @Test
+ public void testEmbeddedResourceLimit() throws Exception {
+ for (int i : new int[]{0,1,5}) {
+ Response response = WebClient
+ .create(endPoint + META_PATH)
+ .accept("application/json")
+ .header("maxEmbeddedResources", Integer.toString(i))
+ .put(ClassLoader
+ .getSystemResourceAsStream(TEST_RECURSIVE_DOC));
+
+ assertEquals(200, response.getStatus());
+ // Check results
+ Reader reader = new InputStreamReader((InputStream) response.getEntity(), UTF_8);
+ List<Metadata> metadataList = JsonMetadataList.fromJson(reader);
+ assertEquals(i+1, metadataList.size());
+ }
+ }
+
+ @Test
+ public void testWriteLimit() throws Exception {
+ int writeLimit = 10;
+ Response response = WebClient
+ .create(endPoint + META_PATH)
+ .accept("application/json")
+ .header("writeLimit", Integer.toString(writeLimit))
+ .put(ClassLoader
+ .getSystemResourceAsStream(TEST_RECURSIVE_DOC));
+
+ assertEquals(200, response.getStatus());
+ // Check results
+ Reader reader = new InputStreamReader((InputStream) response.getEntity(), UTF_8);
+ List<Metadata> metadataList = JsonMetadataList.fromJson(reader);
+ assertEquals(1, metadataList.size());
+ assertEquals("true", metadataList.get(0).get(AbstractRecursiveParserWrapperHandler.WRITE_LIMIT_REACHED));
+
+ //now try with a write limit of 100
+ writeLimit = 100;
+ response = WebClient
+ .create(endPoint + META_PATH)
+ .accept("application/json")
+ .header("writeLimit", Integer.toString(writeLimit))
+ .put(ClassLoader
+ .getSystemResourceAsStream(TEST_RECURSIVE_DOC));
+
+ assertEquals(200, response.getStatus());
+ // Check results
+ reader = new InputStreamReader((InputStream) response.getEntity(), UTF_8);
+ metadataList = JsonMetadataList.fromJson(reader);
+ assertEquals(12, metadataList.size());
+ assertEquals("true", metadataList.get(6).get(AbstractRecursiveParserWrapperHandler.WRITE_LIMIT_REACHED));
+ assertContains("When in the Course of human events it becomes necessary for one people",
+ metadataList.get(6).get(AbstractRecursiveParserWrapperHandler.TIKA_CONTENT));
+ assertNotContained("to dissolve",
+ metadataList.get(6).get(AbstractRecursiveParserWrapperHandler.TIKA_CONTENT));
+
+ }
}