You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ni...@apache.org on 2018/03/14 17:35:18 UTC

[tika] 04/04: Keep all implemented and unit test

This is an automated email from the ASF dual-hosted git repository.

nick pushed a commit to branch multiple-parsers
in repository https://gitbox.apache.org/repos/asf/tika.git

commit 12a98b63babc8515177d6f0e3df17ae8912142ee
Author: Nick Burch <ni...@gagravarr.org>
AuthorDate: Wed Mar 14 17:35:01 2018 +0000

    Keep all implemented and unit test
---
 .../parser/multiple/AbstractMultipleParser.java    | 16 ++++++++++++++--
 .../tika/parser/multiple/MultipleParserTest.java   | 22 +++++++++++++++++++++-
 2 files changed, 35 insertions(+), 3 deletions(-)

diff --git a/tika-core/src/main/java/org/apache/tika/parser/multiple/AbstractMultipleParser.java b/tika-core/src/main/java/org/apache/tika/parser/multiple/AbstractMultipleParser.java
index ece2b8d..8f896b2 100644
--- a/tika-core/src/main/java/org/apache/tika/parser/multiple/AbstractMultipleParser.java
+++ b/tika-core/src/main/java/org/apache/tika/parser/multiple/AbstractMultipleParser.java
@@ -22,6 +22,7 @@ import static org.apache.tika.utils.ParserUtils.recordParserFailure;
 
 import java.io.IOException;
 import java.io.InputStream;
+import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.HashSet;
 import java.util.List;
@@ -326,8 +327,19 @@ public abstract class AbstractMultipleParser extends AbstractParser {
                     // Most recent (last) parser has already won
                     continue;
                 case KEEP_ALL:
-                    // TODO Find unique values to add
-                    // TODO Implement
+                    // Start with old list, then add any new unique values
+                    List<String> vals = new ArrayList<>(Arrays.asList(oldVals));
+                    newMetadata.remove(n);
+                    for (String oldVal : oldVals) {
+                        newMetadata.add(n, oldVal);
+                    }
+                    for (String newVal : newVals) {
+                        if (! vals.contains(newVal)) {
+                            newMetadata.add(n, newVal);
+                            vals.add(newVal);
+                        }
+                    }
+                    
                     continue;
                 }
             }
diff --git a/tika-core/src/test/java/org/apache/tika/parser/multiple/MultipleParserTest.java b/tika-core/src/test/java/org/apache/tika/parser/multiple/MultipleParserTest.java
index 3d77e9d..590c95d 100644
--- a/tika-core/src/test/java/org/apache/tika/parser/multiple/MultipleParserTest.java
+++ b/tika-core/src/test/java/org/apache/tika/parser/multiple/MultipleParserTest.java
@@ -220,7 +220,27 @@ public class MultipleParserTest {
         assertEquals(EmptyParser.class.getName(), usedParsers[3]);
         
         
-        // TODO Implement then check the Merge policies
+        // Merge
+        p = new SupplementingParser(null, MetadataPolicy.KEEP_ALL, pFail, 
+                                    pContent1, pContent2, pNothing);
+
+        metadata = new Metadata();
+        handler = new BodyContentHandler();
+        p.parse(new ByteArrayInputStream(new byte[] {0,1,2,3,4}), handler, metadata, context);
+        assertEquals("Fell back 1!Fell back 2!", handler.toString());
+
+        assertEquals("Test1", metadata.get("T1"));
+        assertEquals("Test2", metadata.get("T2"));
+        assertEquals(2, metadata.getValues("TBoth").length);
+        assertEquals("Test1", metadata.getValues("TBoth")[0]);
+        assertEquals("Test2", metadata.getValues("TBoth")[1]);
+
+        usedParsers = metadata.getValues("X-Parsed-By");
+        assertEquals(4, usedParsers.length);
+        assertEquals(ErrorParser.class.getName(), usedParsers[0]);
+        assertEquals(DummyParser.class.getName(), usedParsers[1]);
+        assertEquals(DummyParser.class.getName(), usedParsers[2]);
+        assertEquals(EmptyParser.class.getName(), usedParsers[3]);
 
         
         // Check the error details always come through, no matter the policy

-- 
To stop receiving notification emails like this one, please contact
nick@apache.org.