You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2024/03/26 13:31:35 UTC
(tika) branch main updated: TIKA-4219 -- clean up...do not include font names in main package
This is an automated email from the ASF dual-hosted git repository.
tallison pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git
The following commit(s) were added to refs/heads/main by this push:
new e88be05ad TIKA-4219 -- clean up...do not include font names in main package
e88be05ad is described below
commit e88be05ad588a59916f199643f51673d693b0642
Author: tallison <ta...@apache.org>
AuthorDate: Tue Mar 26 09:10:01 2024 -0400
TIKA-4219 -- clean up...do not include font names in main package
---
.../src/main/java/org/apache/tika/parser/epub/EpubParser.java | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-miscoffice-module/src/main/java/org/apache/tika/parser/epub/EpubParser.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-miscoffice-module/src/main/java/org/apache/tika/parser/epub/EpubParser.java
index 7c4168b0c..56ff532d9 100644
--- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-miscoffice-module/src/main/java/org/apache/tika/parser/epub/EpubParser.java
+++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-miscoffice-module/src/main/java/org/apache/tika/parser/epub/EpubParser.java
@@ -451,9 +451,13 @@ public class EpubParser implements Parser {
xhtml.startElement("div", "class", "embedded");
try {
+ boolean outputHtml = true;
+ if (hRefMediaPair.media.contains("font") || hRefMediaPair.href.startsWith("fonts")) {
+ outputHtml = false;
+ }
embeddedDocumentExtractor
.parseEmbedded(stream, new EmbeddedContentHandler(xhtml), embeddedMetadata,
- true);
+ outputHtml);
} finally {
IOUtils.closeQuietly(stream);