You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by kk...@apache.org on 2016/01/21 20:34:56 UTC
tika git commit: TIKA-1835: LinkContentHandler skips iframe and rel
tags
Repository: tika
Updated Branches:
refs/heads/master 489ab93c7 -> fe841bc87
TIKA-1835: LinkContentHandler skips iframe and rel tags
Project: http://git-wip-us.apache.org/repos/asf/tika/repo
Commit: http://git-wip-us.apache.org/repos/asf/tika/commit/fe841bc8
Tree: http://git-wip-us.apache.org/repos/asf/tika/tree/fe841bc8
Diff: http://git-wip-us.apache.org/repos/asf/tika/diff/fe841bc8
Branch: refs/heads/master
Commit: fe841bc8760027eae3f9d0105238780815451346
Parents: 489ab93
Author: Ken Krugler <ke...@transpac.com>
Authored: Thu Jan 21 11:26:33 2016 -0800
Committer: Ken Krugler <ke...@transpac.com>
Committed: Thu Jan 21 11:26:33 2016 -0800
----------------------------------------------------------------------
.../src/main/java/org/apache/tika/sax/Link.java | 8 +++++
.../java/org/apache/tika/sax/LinkBuilder.java | 3 +-
.../org/apache/tika/sax/LinkContentHandler.java | 11 +++++-
.../apache/tika/sax/LinkContentHandlerTest.java | 35 +++++++++++++++++++-
4 files changed, 53 insertions(+), 4 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/tika/blob/fe841bc8/tika-core/src/main/java/org/apache/tika/sax/Link.java
----------------------------------------------------------------------
diff --git a/tika-core/src/main/java/org/apache/tika/sax/Link.java b/tika-core/src/main/java/org/apache/tika/sax/Link.java
index 00cf223..c076f42 100644
--- a/tika-core/src/main/java/org/apache/tika/sax/Link.java
+++ b/tika-core/src/main/java/org/apache/tika/sax/Link.java
@@ -51,6 +51,14 @@ public class Link {
public boolean isImage() {
return "img".equals(type);
}
+
+ public boolean isLink() {
+ return "link".equals(type);
+ }
+
+ public boolean isIframe() {
+ return "iframe".equals(type);
+ }
public String getType() {
return type;
http://git-wip-us.apache.org/repos/asf/tika/blob/fe841bc8/tika-core/src/main/java/org/apache/tika/sax/LinkBuilder.java
----------------------------------------------------------------------
diff --git a/tika-core/src/main/java/org/apache/tika/sax/LinkBuilder.java b/tika-core/src/main/java/org/apache/tika/sax/LinkBuilder.java
index 5e6c540..9c87343 100644
--- a/tika-core/src/main/java/org/apache/tika/sax/LinkBuilder.java
+++ b/tika-core/src/main/java/org/apache/tika/sax/LinkBuilder.java
@@ -73,5 +73,4 @@ class LinkBuilder {
return new Link(type, uri, title, anchor, rel);
}
-
-}
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/tika/blob/fe841bc8/tika-core/src/main/java/org/apache/tika/sax/LinkContentHandler.java
----------------------------------------------------------------------
diff --git a/tika-core/src/main/java/org/apache/tika/sax/LinkContentHandler.java b/tika-core/src/main/java/org/apache/tika/sax/LinkContentHandler.java
index 2ad3770..7eecc4c 100644
--- a/tika-core/src/main/java/org/apache/tika/sax/LinkContentHandler.java
+++ b/tika-core/src/main/java/org/apache/tika/sax/LinkContentHandler.java
@@ -85,6 +85,15 @@ public class LinkContentHandler extends DefaultHandler {
builder.setTitle(attributes.getValue("", "title"));
builder.setRel(attributes.getValue("", "rel"));
builderStack.addFirst(builder);
+ } else if ("link".equals(local)) {
+ LinkBuilder builder = new LinkBuilder("link");
+ builder.setURI(attributes.getValue("", "href"));
+ builder.setRel(attributes.getValue("", "rel"));
+ builderStack.addFirst(builder);
+ } else if ("iframe".equals(local)) {
+ LinkBuilder builder = new LinkBuilder("iframe");
+ builder.setURI(attributes.getValue("", "src"));
+ builderStack.addFirst(builder);
} else if ("img".equals(local)) {
LinkBuilder builder = new LinkBuilder("img");
builder.setURI(attributes.getValue("", "src"));
@@ -116,7 +125,7 @@ public class LinkContentHandler extends DefaultHandler {
@Override
public void endElement(String uri, String local, String name) {
if (XHTML.equals(uri)) {
- if ("a".equals(local) || "img".equals(local)) {
+ if ("a".equals(local) || "img".equals(local) || "link".equals(local) || "iframe".equals(local)) {
links.add(builderStack.removeFirst().getLink(collapseWhitespaceInAnchor));
}
}
http://git-wip-us.apache.org/repos/asf/tika/blob/fe841bc8/tika-core/src/test/java/org/apache/tika/sax/LinkContentHandlerTest.java
----------------------------------------------------------------------
diff --git a/tika-core/src/test/java/org/apache/tika/sax/LinkContentHandlerTest.java b/tika-core/src/test/java/org/apache/tika/sax/LinkContentHandlerTest.java
index 74859a8..9f81c87 100644
--- a/tika-core/src/test/java/org/apache/tika/sax/LinkContentHandlerTest.java
+++ b/tika-core/src/test/java/org/apache/tika/sax/LinkContentHandlerTest.java
@@ -55,5 +55,38 @@ public class LinkContentHandlerTest {
assertEquals(" anchor ", linkContentHandler.getLinks().get(0).getText());
}
+
+ /**
+ * @see <a href="https://issues.apache.org/jira/browse/TIKA-1835">TIKA-1835</a>
+ */
+ @Test
+ public void testLinkTag() throws Exception {
+ LinkContentHandler linkContentHandler = new LinkContentHandler();
+
+ AttributesImpl atts = new AttributesImpl();
+ atts.addAttribute("", "href", "href", "", "http://tika.apache.org/stylesheet.css");
+ atts.addAttribute("", "rel", "rel", "", "stylesheet");
+
+ linkContentHandler.startElement(XHTMLContentHandler.XHTML, "link", "", atts);
+ linkContentHandler.endElement(XHTMLContentHandler.XHTML, "link", "");
-}
+ assertEquals("http://tika.apache.org/stylesheet.css", linkContentHandler.getLinks().get(0).getUri());
+ assertEquals("stylesheet", linkContentHandler.getLinks().get(0).getRel());
+ }
+
+ /**
+ * @see <a href="https://issues.apache.org/jira/browse/TIKA-1835">TIKA-1835</a>
+ */
+ @Test
+ public void testIframeTag() throws Exception {
+ LinkContentHandler linkContentHandler = new LinkContentHandler();
+
+ AttributesImpl atts = new AttributesImpl();
+ atts.addAttribute("", "src", "src", "", "http://tika.apache.org/iframe.html");
+
+ linkContentHandler.startElement(XHTMLContentHandler.XHTML, "iframe", "", atts);
+ linkContentHandler.endElement(XHTMLContentHandler.XHTML, "iframe", "");
+
+ assertEquals("http://tika.apache.org/iframe.html", linkContentHandler.getLinks().get(0).getUri());
+ }
+}
\ No newline at end of file