You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@any23.apache.org by mo...@apache.org on 2012/03/23 15:16:05 UTC

svn commit: r1304362 [1/2] - in /incubator/any23/trunk/core/src: main/java/org/apache/any23/extractor/ main/java/org/apache/any23/extractor/html/ test/java/org/apache/any23/extractor/ test/java/org/apache/any23/extractor/html/ test/resources/microforma...

Author: mostarda
Date: Fri Mar 23 14:16:04 2012
New Revision: 1304362

URL: http://svn.apache.org/viewvc?rev=1304362&view=rev
Log:
Fixed issue with loop while computing inclusions in HCardExtractor (ANY23-58).
Added regression test in HCardExtractorTest .
While fixing this issue another bug about extractor Issue reporting 
has been discovered and fixed (ANY23-62).

Added:
    incubator/any23/trunk/core/src/test/resources/microformats/hcard/infinite-loop.html
Modified:
    incubator/any23/trunk/core/src/main/java/org/apache/any23/extractor/ExtractionResultImpl.java
    incubator/any23/trunk/core/src/main/java/org/apache/any23/extractor/html/HCardExtractor.java
    incubator/any23/trunk/core/src/test/java/org/apache/any23/extractor/ExtractionResultImplTest.java
    incubator/any23/trunk/core/src/test/java/org/apache/any23/extractor/html/HCardExtractorTest.java

Modified: incubator/any23/trunk/core/src/main/java/org/apache/any23/extractor/ExtractionResultImpl.java
URL: http://svn.apache.org/viewvc/incubator/any23/trunk/core/src/main/java/org/apache/any23/extractor/ExtractionResultImpl.java?rev=1304362&r1=1304361&r2=1304362&view=diff
==============================================================================
--- incubator/any23/trunk/core/src/main/java/org/apache/any23/extractor/ExtractionResultImpl.java (original)
+++ incubator/any23/trunk/core/src/main/java/org/apache/any23/extractor/ExtractionResultImpl.java Fri Mar 23 14:16:04 2012
@@ -81,6 +81,15 @@ public class ExtractionResultImpl implem
             Extractor<?> extractor,
             TripleHandler tripleHandler
     ) {
+        this(context, extractor, tripleHandler, new ArrayList<Issue>());
+    }
+
+    private ExtractionResultImpl(
+            ExtractionContext context,
+            Extractor<?> extractor,
+            TripleHandler tripleHandler,
+            List<Issue> issues
+    ) {
         if(context == null) {
             throw new NullPointerException("context cannot be null.");
         }
@@ -94,24 +103,24 @@ public class ExtractionResultImpl implem
         this.extractor       = extractor;
         this.tripleHandler   = tripleHandler;
         this.context         = context;
+        this.issues          = issues;
+
         knownContextIDs.add( context.getUniqueID() );
     }
 
     public boolean hasIssues() {
-        return issues != null;
+        return ! issues.isEmpty();
     }
 
     public int getIssuesCount() {
-        return issues == null ? 0 : issues.size();
+        return issues.size();
     }
 
     public void printReport(PrintStream ps) {
         ps.print(String.format("Context: %s [errors: %d] {\n", context, getIssuesCount()));
-        if (issues != null) {
-            for (Issue issue : issues) {
-                ps.print(issue.toString());
-                ps.print("\n");
-            }
+        for (Issue issue : issues) {
+            ps.print(issue.toString());
+            ps.print("\n");
         }
         // Printing sub results.
         for (ExtractionResult er : subResults) {
@@ -121,7 +130,7 @@ public class ExtractionResultImpl implem
     }
 
     public Collection<Issue> getIssues() {
-        return issues == null ? Collections.<Issue>emptyList() : Collections.unmodifiableList(issues);
+        return issues.isEmpty() ? Collections.<Issue>emptyList() : Collections.unmodifiableList(issues);
     }
 
     public ExtractionResult openSubResult(ExtractionContext context) {
@@ -132,8 +141,7 @@ public class ExtractionResultImpl implem
         knownContextIDs.add(contextID);
 
         checkOpen();
-        ExtractionResult result =
-                new ExtractionResultImpl(context, extractor, tripleHandler);
+        ExtractionResult result = new ExtractionResultImpl(context, extractor, tripleHandler, this.issues);
         subResults.add(result);
         return result;
     }
@@ -176,9 +184,6 @@ public class ExtractionResultImpl implem
     }
 
     public void notifyIssue(IssueLevel level, String msg, int row, int col) {
-        if(issues == null) {
-            issues = new ArrayList<Issue>();
-        }
         issues.add(new Issue(level, msg, row, col));
     }
 

Modified: incubator/any23/trunk/core/src/main/java/org/apache/any23/extractor/html/HCardExtractor.java
URL: http://svn.apache.org/viewvc/incubator/any23/trunk/core/src/main/java/org/apache/any23/extractor/html/HCardExtractor.java?rev=1304362&r1=1304361&r2=1304362&view=diff
==============================================================================
--- incubator/any23/trunk/core/src/main/java/org/apache/any23/extractor/html/HCardExtractor.java (original)
+++ incubator/any23/trunk/core/src/main/java/org/apache/any23/extractor/html/HCardExtractor.java Fri Mar 23 14:16:04 2012
@@ -20,6 +20,7 @@ package org.apache.any23.extractor.html;
 import org.apache.any23.extractor.ExtractionException;
 import org.apache.any23.extractor.ExtractionResult;
 import org.apache.any23.extractor.ExtractorDescription;
+import org.apache.any23.extractor.IssueReport;
 import org.apache.any23.extractor.SimpleExtractorFactory;
 import org.apache.any23.extractor.TagSoupExtractionResult;
 import org.apache.any23.extractor.html.annotations.Includes;
@@ -80,7 +81,7 @@ public class HCardExtractor extends Enti
         name.reset(); // Cleanup of the HCardName content.
     }
 
-    private void fixIncludes(HTMLDocument document, Node node) {
+    private void fixIncludes(HTMLDocument document, Node node, IssueReport report) {
         NamedNodeMap attributes = node.getAttributes();
         // header case test 32
         if ("TD".equals(node.getNodeName()) && (null != attributes.getNamedItem("headers"))) {
@@ -91,8 +92,8 @@ public class HCardExtractor extends Enti
                 attributes.removeNamedItem("headers");
             }
         }
-        // include pattern, test 31
 
+        // include pattern, test 31
         for (Node current : document.findAll("//*[@class]")) {
             if (!DomUtils.hasClassName(current, "include")) continue;
             // we have to remove the field soon to avoid infinite loops
@@ -103,10 +104,19 @@ public class HCardExtractor extends Enti
             TextField id = res.get(0);
             if (null == id)
                 continue;
-            id = new TextField( StringUtils.substringAfter(id.value(), "#"), id.source() );
-            Node included = document.findNodeById(id.value());
+            TextField refId = new TextField( StringUtils.substringAfter(id.value(), "#"), id.source() );
+            Node included = document.findNodeById(refId.value());
             if (null == included)
                 continue;
+            if( DomUtils.isAncestorOf(included, current) )  {
+                final int[] nodeLocation = DomUtils.getNodeLocation(current);
+                report.notifyIssue(
+                        IssueReport.IssueLevel.Warning,
+                        "Current node tries to include an ancestor node.",
+                        nodeLocation[0], nodeLocation[1]
+                );
+                continue;
+            }
             current.appendChild(included.cloneNode(true));
         }
     }
@@ -114,7 +124,7 @@ public class HCardExtractor extends Enti
     @Override
     protected boolean extractEntity(Node node, ExtractionResult out) throws ExtractionException {
         this.fragment = new HTMLDocument(node);
-        fixIncludes(getHTMLDocument(), node);
+        fixIncludes(getHTMLDocument(), node, out);
         final BNode card = getBlankNodeFor(node);
         boolean foundSomething = false;
 

Modified: incubator/any23/trunk/core/src/test/java/org/apache/any23/extractor/ExtractionResultImplTest.java
URL: http://svn.apache.org/viewvc/incubator/any23/trunk/core/src/test/java/org/apache/any23/extractor/ExtractionResultImplTest.java?rev=1304362&r1=1304361&r2=1304362&view=diff
==============================================================================
--- incubator/any23/trunk/core/src/test/java/org/apache/any23/extractor/ExtractionResultImplTest.java (original)
+++ incubator/any23/trunk/core/src/test/java/org/apache/any23/extractor/ExtractionResultImplTest.java Fri Mar 23 14:16:04 2012
@@ -68,14 +68,15 @@ public class ExtractionResultImplTest {
 
     @Test
     public void testNotifyErrors() throws IOException {
+        notifyErrors(extractionResult);
+        assertContent(extractionResult, 3);
+
         final ExtractionResult subExtractionResult = extractionResult.openSubResult(
                 new ExtractionContext("sub-id", RDFUtils.uri("http://sub/uri") )
         );
-        notifyErrors(extractionResult);
-        notifyErrors(subExtractionResult);
 
-        assertContent(extractionResult);
-        assertContent(subExtractionResult);
+        notifyErrors(subExtractionResult);
+        assertContent(subExtractionResult, 6);
     }
 
     private void notifyErrors(ExtractionResult er) {
@@ -84,12 +85,12 @@ public class ExtractionResultImplTest {
         er.notifyIssue(IssueReport.IssueLevel.Fatal  , "Fatal message"  , 5, 6);
     }
 
-    private void assertContent(ExtractionResult er) {
-        Assert.assertEquals("Unexpected errors list size." , 3, er.getIssues().size() );
+    private void assertContent(ExtractionResult er, int errorCount) {
+        Assert.assertEquals("Unexpected errors list size." , errorCount, er.getIssues().size() );
         assertOutputString(er, IssueReport.IssueLevel.Error.toString());
         assertOutputString(er, IssueReport.IssueLevel.Warning.toString());
         assertOutputString(er, IssueReport.IssueLevel.Fatal.toString());
-        assertOutputString(er, "errors: 3");
+        assertOutputString(er, "errors: " + errorCount);
     }
 
     private void assertOutputString(ExtractionResult er, String s) {

Modified: incubator/any23/trunk/core/src/test/java/org/apache/any23/extractor/html/HCardExtractorTest.java
URL: http://svn.apache.org/viewvc/incubator/any23/trunk/core/src/test/java/org/apache/any23/extractor/html/HCardExtractorTest.java?rev=1304362&r1=1304361&r2=1304362&view=diff
==============================================================================
--- incubator/any23/trunk/core/src/test/java/org/apache/any23/extractor/html/HCardExtractorTest.java (original)
+++ incubator/any23/trunk/core/src/test/java/org/apache/any23/extractor/html/HCardExtractorTest.java Fri Mar 23 14:16:04 2012
@@ -20,8 +20,8 @@ package org.apache.any23.extractor.html;
 import junit.framework.Assert;
 import org.apache.any23.extractor.ExtractionException;
 import org.apache.any23.extractor.ExtractorFactory;
+import org.apache.any23.extractor.IssueReport;
 import org.apache.any23.rdf.RDFUtils;
-import org.apache.any23.vocab.SINDICE;
 import org.apache.any23.vocab.VCARD;
 import org.junit.Test;
 import org.openrdf.model.Resource;
@@ -40,7 +40,6 @@ import java.util.List;
  */
 public class HCardExtractorTest extends AbstractExtractorTestCase {
 
-    private static final SINDICE vSINDICE = SINDICE.getInstance();
     private static final VCARD   vVCARD   = VCARD.getInstance();
 
     protected ExtractorFactory<?> getExtractorFactory() {
@@ -954,6 +953,19 @@ public class HCardExtractorTest extends 
         assertStatementsSize(vVCARD.family_name, "Mostarda", 7);
     }
 
+    /**
+     * Tests the detection and prevention of the inclusion of an ancestor by a sibling node.
+         * This test is related to issue <a href="https://issues.apache.org/jira/browse/ANY23-58">ANY23-58</a>.
+     *
+     * @throws IOException
+     * @throws ExtractionException
+     */
+    @Test
+    public void testInfiniteLoop() throws IOException, ExtractionException {
+        assertExtract("microformats/hcard/infinite-loop.html", false);
+        assertIssue(IssueReport.IssueLevel.Warning, ".*Current node tries to include an ancestor node.*");
+    }
+
     private void assertDefaultVCard() throws RepositoryException {
         assertModelNotEmpty();
         assertStatementsSize(RDF.TYPE, vVCARD.VCard, 1);