You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2022/05/11 20:42:05 UTC

[tika] branch main updated: remove dodgy testStandardsExtractor.pdf

This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git


The following commit(s) were added to refs/heads/main by this push:
     new c3b489ab5 remove dodgy testStandardsExtractor.pdf
     new 8876fdf62 Merge remote-tracking branch 'origin/main' into main
c3b489ab5 is described below

commit c3b489ab5ee16dd89acf35836029b4fb0049e1c1
Author: tallison <ta...@apache.org>
AuthorDate: Wed May 11 16:41:41 2022 -0400

    remove dodgy testStandardsExtractor.pdf
---
 .../test-documents/testStandardsExtractor.pdf          | Bin 143659 -> 0 bytes
 .../sax/StandardsExtractingContentHandlerTest.java     |   5 ++---
 .../test-documents/testStandardsExtractor.txt          |  13 +++++++++++++
 3 files changed, 15 insertions(+), 3 deletions(-)

diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/test/resources/test-documents/testStandardsExtractor.pdf b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/test/resources/test-documents/testStandardsExtractor.pdf
deleted file mode 100644
index aad6d79d4..000000000
Binary files a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/test/resources/test-documents/testStandardsExtractor.pdf and /dev/null differ
diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/sax/StandardsExtractingContentHandlerTest.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/sax/StandardsExtractingContentHandlerTest.java
index da71ebdd0..abc703c70 100644
--- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/sax/StandardsExtractingContentHandlerTest.java
+++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/sax/StandardsExtractingContentHandlerTest.java
@@ -40,11 +40,10 @@ public class StandardsExtractingContentHandlerTest extends TikaTest {
 
         StandardsExtractingContentHandler handler =
                 new StandardsExtractingContentHandler(new BodyContentHandler(-1), metadata);
-        handler.setThreshold(0.75);
-        InputStream inputStream = getResourceAsStream("/test-documents/testStandardsExtractor.pdf");
+        handler.setThreshold(0.25);
+        InputStream inputStream = getResourceAsStream("/test-documents/testStandardsExtractor.txt");
 
         AUTO_DETECT_PARSER.parse(inputStream, handler, metadata, new ParseContext());
-
         String[] standardReferences =
                 metadata.getValues(StandardsExtractingContentHandler.STANDARD_REFERENCES);
 
diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/test-documents/testStandardsExtractor.txt b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/test-documents/testStandardsExtractor.txt
new file mode 100644
index 000000000..bc214ae8e
--- /dev/null
+++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/test-documents/testStandardsExtractor.txt
@@ -0,0 +1,13 @@
+REFERENCE STANDARD REQUIREMENT GUIDELINE COMPLIANCE
+
+American National Standards Institute (ANSI) / Telecommunications Industry
+Association (TIA) Publication 222-G Standards (Structural Standards for Steel Antenna
+Towers and Antenna Supporting Structures), including addendum (TIA/ANSI
+222-G-1 standards ) (http://www.tiaonline.org/standards/)
+
+Federal Information Processing Standard (FIPS) Publication 140-2, "Security
+Requirements for Cryptographic Modules"
+(http://csrc.nist.gov/publications/fips/fips140-2/fips1402.pdf)
+15. Federal Information Processing Standard (FIPS) Publication 197, "Advanced
+Encryption Standard"
+(http://csrc.nist.gov/publications/fips/fips197/fips-197.pdf)
\ No newline at end of file