You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2020/06/02 13:52:44 UTC

[tika] 02/12: TIKA-3094 -- new metadata for every parse :(

This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch branch_1x
in repository https://gitbox.apache.org/repos/asf/tika.git

commit 098256bd8eaba266f959c3478c7c9812dbf6e114
Author: tballison <ta...@apache.org>
AuthorDate: Tue May 5 10:42:12 2020 -0400

    TIKA-3094 -- new metadata for every parse :(
---
 .../src/test/java/org/apache/tika/bundle/BundleIT.java    | 15 ++-------------
 1 file changed, 2 insertions(+), 13 deletions(-)

diff --git a/tika-bundle/src/test/java/org/apache/tika/bundle/BundleIT.java b/tika-bundle/src/test/java/org/apache/tika/bundle/BundleIT.java
index 2cab1d5..517aa0a 100644
--- a/tika-bundle/src/test/java/org/apache/tika/bundle/BundleIT.java
+++ b/tika-bundle/src/test/java/org/apache/tika/bundle/BundleIT.java
@@ -317,29 +317,18 @@ public class BundleIT {
         Parser parser = tika.getParser();
         ParseContext context = new ParseContext();
         context.set(Parser.class, parser);
-        Metadata metadata = new Metadata();
         Set<String> needToFix = new HashSet<>();
         needToFix.add("testAccess2_encrypted.accdb");
-
-        Set<String> unknownProblem = new HashSet<>();
-        //these all trigger org.apache.tika.metadata.PropertyTypeException
-        //which for some reason we can't catch (?!)
-        //We don't see problems with these files in tika-parsers?!
-/*        unknownProblem.add("testPPT_embedded_two_slides.pptx");
-        unknownProblem.add("testWORD_multi_authors.docx");
-        unknownProblem.add("testEXCEL_embeded.xlsx");
-        unknownProblem.add("testVORBIS.ogg");
-        unknownProblem.add("testWORD_2006ml.docx");
-        unknownProblem.add("testRTFEmbeddedLink.rtf");*/
         System.out.println(getTestDir());
         for (File f : getTestDir().listFiles()) {
             if (f.isDirectory()) {
                 continue;
             }
-            if (needToFix.contains(f.getName()) || unknownProblem.contains(f.getName())) {
+            if (needToFix.contains(f.getName())) {
                 continue;
             }
             System.out.println("about to parse "+f);
+            Metadata metadata = new Metadata();
             try (InputStream is = TikaInputStream.get(f)) {
                 parser.parse(is, handler, metadata, context);
             } catch (EncryptedDocumentException e) {