You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2018/07/27 14:16:45 UTC
[tika] 11/30: Handle .epub files using .htm rather than .html
extensions for the embedded contents (TIKA-1288)
This is an automated email from the ASF dual-hosted git repository.
tallison pushed a commit to branch branch_1x
in repository https://gitbox.apache.org/repos/asf/tika.git
commit b6399c65a70b768c41febbc228c1cdcdd8ed04b4
Author: Nick Burch <ni...@gagravarr.org>
AuthorDate: Wed May 9 10:23:09 2018 +0100
Handle .epub files using .htm rather than .html extensions for the embedded contents (TIKA-1288)
# Conflicts:
# CHANGES.txt
---
tika-parsers/src/main/java/org/apache/tika/parser/epub/EpubParser.java | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/epub/EpubParser.java b/tika-parsers/src/main/java/org/apache/tika/parser/epub/EpubParser.java
index c4f72de..775b319 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/epub/EpubParser.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/epub/EpubParser.java
@@ -105,7 +105,8 @@ public class EpubParser extends AbstractParser {
meta.parse(zip, new DefaultHandler(), metadata, context);
} else if (entry.getName().endsWith(".opf")) {
meta.parse(zip, new DefaultHandler(), metadata, context);
- } else if (entry.getName().endsWith(".html") ||
+ } else if (entry.getName().endsWith(".htm") ||
+ entry.getName().endsWith(".html") ||
entry.getName().endsWith(".xhtml")) {
content.parse(zip, childHandler, metadata, context);
}