You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by kk...@apache.org on 2016/01/21 20:34:56 UTC

tika git commit: TIKA-1835: LinkContentHandler skips iframe and rel tags

Repository: tika
Updated Branches:
  refs/heads/master 489ab93c7 -> fe841bc87


TIKA-1835: LinkContentHandler skips iframe and rel tags


Project: http://git-wip-us.apache.org/repos/asf/tika/repo
Commit: http://git-wip-us.apache.org/repos/asf/tika/commit/fe841bc8
Tree: http://git-wip-us.apache.org/repos/asf/tika/tree/fe841bc8
Diff: http://git-wip-us.apache.org/repos/asf/tika/diff/fe841bc8

Branch: refs/heads/master
Commit: fe841bc8760027eae3f9d0105238780815451346
Parents: 489ab93
Author: Ken Krugler <ke...@transpac.com>
Authored: Thu Jan 21 11:26:33 2016 -0800
Committer: Ken Krugler <ke...@transpac.com>
Committed: Thu Jan 21 11:26:33 2016 -0800

----------------------------------------------------------------------
 .../src/main/java/org/apache/tika/sax/Link.java |  8 +++++
 .../java/org/apache/tika/sax/LinkBuilder.java   |  3 +-
 .../org/apache/tika/sax/LinkContentHandler.java | 11 +++++-
 .../apache/tika/sax/LinkContentHandlerTest.java | 35 +++++++++++++++++++-
 4 files changed, 53 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/tika/blob/fe841bc8/tika-core/src/main/java/org/apache/tika/sax/Link.java
----------------------------------------------------------------------
diff --git a/tika-core/src/main/java/org/apache/tika/sax/Link.java b/tika-core/src/main/java/org/apache/tika/sax/Link.java
index 00cf223..c076f42 100644
--- a/tika-core/src/main/java/org/apache/tika/sax/Link.java
+++ b/tika-core/src/main/java/org/apache/tika/sax/Link.java
@@ -51,6 +51,14 @@ public class Link {
     public boolean isImage() {
         return "img".equals(type);
     }
+    
+    public boolean isLink() {
+        return "link".equals(type);
+    }
+
+    public boolean isIframe() {
+        return "iframe".equals(type);
+    }
 
     public String getType() {
         return type;

http://git-wip-us.apache.org/repos/asf/tika/blob/fe841bc8/tika-core/src/main/java/org/apache/tika/sax/LinkBuilder.java
----------------------------------------------------------------------
diff --git a/tika-core/src/main/java/org/apache/tika/sax/LinkBuilder.java b/tika-core/src/main/java/org/apache/tika/sax/LinkBuilder.java
index 5e6c540..9c87343 100644
--- a/tika-core/src/main/java/org/apache/tika/sax/LinkBuilder.java
+++ b/tika-core/src/main/java/org/apache/tika/sax/LinkBuilder.java
@@ -73,5 +73,4 @@ class LinkBuilder {
         
         return new Link(type, uri, title, anchor, rel);
     }
-
-}
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/tika/blob/fe841bc8/tika-core/src/main/java/org/apache/tika/sax/LinkContentHandler.java
----------------------------------------------------------------------
diff --git a/tika-core/src/main/java/org/apache/tika/sax/LinkContentHandler.java b/tika-core/src/main/java/org/apache/tika/sax/LinkContentHandler.java
index 2ad3770..7eecc4c 100644
--- a/tika-core/src/main/java/org/apache/tika/sax/LinkContentHandler.java
+++ b/tika-core/src/main/java/org/apache/tika/sax/LinkContentHandler.java
@@ -85,6 +85,15 @@ public class LinkContentHandler extends DefaultHandler {
                 builder.setTitle(attributes.getValue("", "title"));
                 builder.setRel(attributes.getValue("", "rel"));
                 builderStack.addFirst(builder);
+            } else if ("link".equals(local)) {
+                LinkBuilder builder = new LinkBuilder("link");
+                builder.setURI(attributes.getValue("", "href"));
+                builder.setRel(attributes.getValue("", "rel"));
+                builderStack.addFirst(builder);
+            } else if ("iframe".equals(local)) {
+                LinkBuilder builder = new LinkBuilder("iframe");
+                builder.setURI(attributes.getValue("", "src"));
+                builderStack.addFirst(builder);
             } else if ("img".equals(local)) {
                 LinkBuilder builder = new LinkBuilder("img");
                 builder.setURI(attributes.getValue("", "src"));
@@ -116,7 +125,7 @@ public class LinkContentHandler extends DefaultHandler {
     @Override
     public void endElement(String uri, String local, String name) {
         if (XHTML.equals(uri)) {
-            if ("a".equals(local) || "img".equals(local)) {
+            if ("a".equals(local) || "img".equals(local) || "link".equals(local) || "iframe".equals(local)) {
                 links.add(builderStack.removeFirst().getLink(collapseWhitespaceInAnchor));
             }
         }

http://git-wip-us.apache.org/repos/asf/tika/blob/fe841bc8/tika-core/src/test/java/org/apache/tika/sax/LinkContentHandlerTest.java
----------------------------------------------------------------------
diff --git a/tika-core/src/test/java/org/apache/tika/sax/LinkContentHandlerTest.java b/tika-core/src/test/java/org/apache/tika/sax/LinkContentHandlerTest.java
index 74859a8..9f81c87 100644
--- a/tika-core/src/test/java/org/apache/tika/sax/LinkContentHandlerTest.java
+++ b/tika-core/src/test/java/org/apache/tika/sax/LinkContentHandlerTest.java
@@ -55,5 +55,38 @@ public class LinkContentHandlerTest {
 
         assertEquals(" anchor ", linkContentHandler.getLinks().get(0).getText());
     }
+    
+    /**
+     * @see <a href="https://issues.apache.org/jira/browse/TIKA-1835">TIKA-1835</a>
+     */
+    @Test
+    public void testLinkTag() throws Exception {
+        LinkContentHandler linkContentHandler = new LinkContentHandler();
+        
+        AttributesImpl atts = new AttributesImpl();
+        atts.addAttribute("", "href", "href", "", "http://tika.apache.org/stylesheet.css");
+        atts.addAttribute("", "rel", "rel", "", "stylesheet");
+        
+        linkContentHandler.startElement(XHTMLContentHandler.XHTML, "link", "", atts);
+        linkContentHandler.endElement(XHTMLContentHandler.XHTML, "link", "");
 
-}
+        assertEquals("http://tika.apache.org/stylesheet.css", linkContentHandler.getLinks().get(0).getUri());
+        assertEquals("stylesheet", linkContentHandler.getLinks().get(0).getRel());
+    }
+    
+    /**
+     * @see <a href="https://issues.apache.org/jira/browse/TIKA-1835">TIKA-1835</a>
+     */
+    @Test
+    public void testIframeTag() throws Exception {
+        LinkContentHandler linkContentHandler = new LinkContentHandler();
+        
+        AttributesImpl atts = new AttributesImpl();
+        atts.addAttribute("", "src", "src", "", "http://tika.apache.org/iframe.html");
+        
+        linkContentHandler.startElement(XHTMLContentHandler.XHTML, "iframe", "", atts);
+        linkContentHandler.endElement(XHTMLContentHandler.XHTML, "iframe", "");
+
+        assertEquals("http://tika.apache.org/iframe.html", linkContentHandler.getLinks().get(0).getUri());
+    }
+}
\ No newline at end of file