You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ni...@apache.org on 2018/03/14 17:35:18 UTC
[tika] 04/04: Keep all implemented and unit test
This is an automated email from the ASF dual-hosted git repository.
nick pushed a commit to branch multiple-parsers
in repository https://gitbox.apache.org/repos/asf/tika.git
commit 12a98b63babc8515177d6f0e3df17ae8912142ee
Author: Nick Burch <ni...@gagravarr.org>
AuthorDate: Wed Mar 14 17:35:01 2018 +0000
Keep all implemented and unit test
---
.../parser/multiple/AbstractMultipleParser.java | 16 ++++++++++++++--
.../tika/parser/multiple/MultipleParserTest.java | 22 +++++++++++++++++++++-
2 files changed, 35 insertions(+), 3 deletions(-)
diff --git a/tika-core/src/main/java/org/apache/tika/parser/multiple/AbstractMultipleParser.java b/tika-core/src/main/java/org/apache/tika/parser/multiple/AbstractMultipleParser.java
index ece2b8d..8f896b2 100644
--- a/tika-core/src/main/java/org/apache/tika/parser/multiple/AbstractMultipleParser.java
+++ b/tika-core/src/main/java/org/apache/tika/parser/multiple/AbstractMultipleParser.java
@@ -22,6 +22,7 @@ import static org.apache.tika.utils.ParserUtils.recordParserFailure;
import java.io.IOException;
import java.io.InputStream;
+import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
import java.util.List;
@@ -326,8 +327,19 @@ public abstract class AbstractMultipleParser extends AbstractParser {
// Most recent (last) parser has already won
continue;
case KEEP_ALL:
- // TODO Find unique values to add
- // TODO Implement
+ // Start with old list, then add any new unique values
+ List<String> vals = new ArrayList<>(Arrays.asList(oldVals));
+ newMetadata.remove(n);
+ for (String oldVal : oldVals) {
+ newMetadata.add(n, oldVal);
+ }
+ for (String newVal : newVals) {
+ if (! vals.contains(newVal)) {
+ newMetadata.add(n, newVal);
+ vals.add(newVal);
+ }
+ }
+
continue;
}
}
diff --git a/tika-core/src/test/java/org/apache/tika/parser/multiple/MultipleParserTest.java b/tika-core/src/test/java/org/apache/tika/parser/multiple/MultipleParserTest.java
index 3d77e9d..590c95d 100644
--- a/tika-core/src/test/java/org/apache/tika/parser/multiple/MultipleParserTest.java
+++ b/tika-core/src/test/java/org/apache/tika/parser/multiple/MultipleParserTest.java
@@ -220,7 +220,27 @@ public class MultipleParserTest {
assertEquals(EmptyParser.class.getName(), usedParsers[3]);
- // TODO Implement then check the Merge policies
+ // Merge
+ p = new SupplementingParser(null, MetadataPolicy.KEEP_ALL, pFail,
+ pContent1, pContent2, pNothing);
+
+ metadata = new Metadata();
+ handler = new BodyContentHandler();
+ p.parse(new ByteArrayInputStream(new byte[] {0,1,2,3,4}), handler, metadata, context);
+ assertEquals("Fell back 1!Fell back 2!", handler.toString());
+
+ assertEquals("Test1", metadata.get("T1"));
+ assertEquals("Test2", metadata.get("T2"));
+ assertEquals(2, metadata.getValues("TBoth").length);
+ assertEquals("Test1", metadata.getValues("TBoth")[0]);
+ assertEquals("Test2", metadata.getValues("TBoth")[1]);
+
+ usedParsers = metadata.getValues("X-Parsed-By");
+ assertEquals(4, usedParsers.length);
+ assertEquals(ErrorParser.class.getName(), usedParsers[0]);
+ assertEquals(DummyParser.class.getName(), usedParsers[1]);
+ assertEquals(DummyParser.class.getName(), usedParsers[2]);
+ assertEquals(EmptyParser.class.getName(), usedParsers[3]);
// Check the error details always come through, no matter the policy
--
To stop receiving notification emails like this one, please contact
nick@apache.org.