You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2018/07/27 14:16:45 UTC

[tika] 11/30: Handle .epub files using .htm rather than .html extensions for the embedded contents (TIKA-1288)

This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch branch_1x
in repository https://gitbox.apache.org/repos/asf/tika.git

commit b6399c65a70b768c41febbc228c1cdcdd8ed04b4
Author: Nick Burch <ni...@gagravarr.org>
AuthorDate: Wed May 9 10:23:09 2018 +0100

    Handle .epub files using .htm rather than .html extensions for the embedded contents (TIKA-1288)
    
    # Conflicts:
    #	CHANGES.txt
---
 tika-parsers/src/main/java/org/apache/tika/parser/epub/EpubParser.java | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/epub/EpubParser.java b/tika-parsers/src/main/java/org/apache/tika/parser/epub/EpubParser.java
index c4f72de..775b319 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/epub/EpubParser.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/epub/EpubParser.java
@@ -105,7 +105,8 @@ public class EpubParser extends AbstractParser {
                 meta.parse(zip, new DefaultHandler(), metadata, context);
             } else if (entry.getName().endsWith(".opf")) {
                 meta.parse(zip, new DefaultHandler(), metadata, context);
-            } else if (entry.getName().endsWith(".html") || 
+            } else if (entry.getName().endsWith(".htm") || 
+                           entry.getName().endsWith(".html") || 
             		   entry.getName().endsWith(".xhtml")) {
                 content.parse(zip, childHandler, metadata, context);
             }