You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by kk...@apache.org on 2012/08/28 00:24:55 UTC

svn commit: r1377890 - in /tika/trunk/tika-parsers/src: main/java/org/apache/tika/parser/html/HtmlHandler.java test/java/org/apache/tika/parser/html/HtmlParserTest.java

Author: kkrugler
Date: Mon Aug 27 22:24:54 2012
New Revision: 1377890

URL: http://svn.apache.org/viewvc?rev=1377890&view=rev
Log:
TIKA-983: HTML parser should add Open Graph meta tag data to Metadata returned by parser

Modified:
    tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/html/HtmlHandler.java
    tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/html/HtmlParserTest.java

Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/html/HtmlHandler.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/html/HtmlHandler.java?rev=1377890&r1=1377889&r2=1377890&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/html/HtmlHandler.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/html/HtmlHandler.java Mon Aug 27 22:24:54 2012
@@ -110,6 +110,11 @@ class HtmlHandler extends TextContentHan
                     addHtmlMetadata(
                             atts.getValue("name"),
                             atts.getValue("content"));
+                } else if (atts.getValue("property") != null) {
+                    // TIKA-983: Handle <meta property="og:xxx" content="yyy" /> tags
+                    addHtmlMetadata(
+                            atts.getValue("property"),
+                            atts.getValue("content"));
                 }
             } else if ("BASE".equals(name) && atts.getValue("href") != null) {
                 startElementWithSafeAttributes("base", atts);

Modified: tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/html/HtmlParserTest.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/html/HtmlParserTest.java?rev=1377890&r1=1377889&r2=1377890&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/html/HtmlParserTest.java (original)
+++ tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/html/HtmlParserTest.java Mon Aug 27 22:24:54 2012
@@ -783,5 +783,22 @@ public class HtmlParserTest extends Test
         assertTrue(Pattern.matches("\tone\n\n", result));
     }
 
-    
+    /**
+     * Test case for TIKA-983:  HTML parser should add Open Graph meta tag data to Metadata returned by parser
+     * 
+     * @see <a href="https://issues.apache.org/jira/browse/TIKA-983">TIKA-983</a>
+     */
+    public void testOpenGraphMetadata() throws Exception {
+        String test1 =
+            "<html><head><meta property=\"og:description\""
+            + " content=\"some description\" />"
+            + "<title>hello</title>"
+            + "</head><body></body></html>";
+        Metadata metadata = new Metadata();
+        new HtmlParser().parse (
+                new ByteArrayInputStream(test1.getBytes("ISO-8859-1")),
+                new BodyContentHandler(),  metadata, new ParseContext());
+        assertEquals("some description", metadata.get("og:description"));
+
+    }
 }