You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@any23.apache.org by mo...@apache.org on 2012/03/23 15:16:05 UTC
svn commit: r1304362 [1/2] - in /incubator/any23/trunk/core/src:
main/java/org/apache/any23/extractor/
main/java/org/apache/any23/extractor/html/
test/java/org/apache/any23/extractor/
test/java/org/apache/any23/extractor/html/ test/resources/microforma...
Author: mostarda
Date: Fri Mar 23 14:16:04 2012
New Revision: 1304362
URL: http://svn.apache.org/viewvc?rev=1304362&view=rev
Log:
Fixed issue with loop while computing inclusions in HCardExtractor (ANY23-58).
Added regression test in HCardExtractorTest .
While fixing this issue another bug about extractor Issue reporting
has been discovered and fixed (ANY23-62).
Added:
incubator/any23/trunk/core/src/test/resources/microformats/hcard/infinite-loop.html
Modified:
incubator/any23/trunk/core/src/main/java/org/apache/any23/extractor/ExtractionResultImpl.java
incubator/any23/trunk/core/src/main/java/org/apache/any23/extractor/html/HCardExtractor.java
incubator/any23/trunk/core/src/test/java/org/apache/any23/extractor/ExtractionResultImplTest.java
incubator/any23/trunk/core/src/test/java/org/apache/any23/extractor/html/HCardExtractorTest.java
Modified: incubator/any23/trunk/core/src/main/java/org/apache/any23/extractor/ExtractionResultImpl.java
URL: http://svn.apache.org/viewvc/incubator/any23/trunk/core/src/main/java/org/apache/any23/extractor/ExtractionResultImpl.java?rev=1304362&r1=1304361&r2=1304362&view=diff
==============================================================================
--- incubator/any23/trunk/core/src/main/java/org/apache/any23/extractor/ExtractionResultImpl.java (original)
+++ incubator/any23/trunk/core/src/main/java/org/apache/any23/extractor/ExtractionResultImpl.java Fri Mar 23 14:16:04 2012
@@ -81,6 +81,15 @@ public class ExtractionResultImpl implem
Extractor<?> extractor,
TripleHandler tripleHandler
) {
+ this(context, extractor, tripleHandler, new ArrayList<Issue>());
+ }
+
+ private ExtractionResultImpl(
+ ExtractionContext context,
+ Extractor<?> extractor,
+ TripleHandler tripleHandler,
+ List<Issue> issues
+ ) {
if(context == null) {
throw new NullPointerException("context cannot be null.");
}
@@ -94,24 +103,24 @@ public class ExtractionResultImpl implem
this.extractor = extractor;
this.tripleHandler = tripleHandler;
this.context = context;
+ this.issues = issues;
+
knownContextIDs.add( context.getUniqueID() );
}
public boolean hasIssues() {
- return issues != null;
+ return ! issues.isEmpty();
}
public int getIssuesCount() {
- return issues == null ? 0 : issues.size();
+ return issues.size();
}
public void printReport(PrintStream ps) {
ps.print(String.format("Context: %s [errors: %d] {\n", context, getIssuesCount()));
- if (issues != null) {
- for (Issue issue : issues) {
- ps.print(issue.toString());
- ps.print("\n");
- }
+ for (Issue issue : issues) {
+ ps.print(issue.toString());
+ ps.print("\n");
}
// Printing sub results.
for (ExtractionResult er : subResults) {
@@ -121,7 +130,7 @@ public class ExtractionResultImpl implem
}
public Collection<Issue> getIssues() {
- return issues == null ? Collections.<Issue>emptyList() : Collections.unmodifiableList(issues);
+ return issues.isEmpty() ? Collections.<Issue>emptyList() : Collections.unmodifiableList(issues);
}
public ExtractionResult openSubResult(ExtractionContext context) {
@@ -132,8 +141,7 @@ public class ExtractionResultImpl implem
knownContextIDs.add(contextID);
checkOpen();
- ExtractionResult result =
- new ExtractionResultImpl(context, extractor, tripleHandler);
+ ExtractionResult result = new ExtractionResultImpl(context, extractor, tripleHandler, this.issues);
subResults.add(result);
return result;
}
@@ -176,9 +184,6 @@ public class ExtractionResultImpl implem
}
public void notifyIssue(IssueLevel level, String msg, int row, int col) {
- if(issues == null) {
- issues = new ArrayList<Issue>();
- }
issues.add(new Issue(level, msg, row, col));
}
Modified: incubator/any23/trunk/core/src/main/java/org/apache/any23/extractor/html/HCardExtractor.java
URL: http://svn.apache.org/viewvc/incubator/any23/trunk/core/src/main/java/org/apache/any23/extractor/html/HCardExtractor.java?rev=1304362&r1=1304361&r2=1304362&view=diff
==============================================================================
--- incubator/any23/trunk/core/src/main/java/org/apache/any23/extractor/html/HCardExtractor.java (original)
+++ incubator/any23/trunk/core/src/main/java/org/apache/any23/extractor/html/HCardExtractor.java Fri Mar 23 14:16:04 2012
@@ -20,6 +20,7 @@ package org.apache.any23.extractor.html;
import org.apache.any23.extractor.ExtractionException;
import org.apache.any23.extractor.ExtractionResult;
import org.apache.any23.extractor.ExtractorDescription;
+import org.apache.any23.extractor.IssueReport;
import org.apache.any23.extractor.SimpleExtractorFactory;
import org.apache.any23.extractor.TagSoupExtractionResult;
import org.apache.any23.extractor.html.annotations.Includes;
@@ -80,7 +81,7 @@ public class HCardExtractor extends Enti
name.reset(); // Cleanup of the HCardName content.
}
- private void fixIncludes(HTMLDocument document, Node node) {
+ private void fixIncludes(HTMLDocument document, Node node, IssueReport report) {
NamedNodeMap attributes = node.getAttributes();
// header case test 32
if ("TD".equals(node.getNodeName()) && (null != attributes.getNamedItem("headers"))) {
@@ -91,8 +92,8 @@ public class HCardExtractor extends Enti
attributes.removeNamedItem("headers");
}
}
- // include pattern, test 31
+ // include pattern, test 31
for (Node current : document.findAll("//*[@class]")) {
if (!DomUtils.hasClassName(current, "include")) continue;
// we have to remove the field soon to avoid infinite loops
@@ -103,10 +104,19 @@ public class HCardExtractor extends Enti
TextField id = res.get(0);
if (null == id)
continue;
- id = new TextField( StringUtils.substringAfter(id.value(), "#"), id.source() );
- Node included = document.findNodeById(id.value());
+ TextField refId = new TextField( StringUtils.substringAfter(id.value(), "#"), id.source() );
+ Node included = document.findNodeById(refId.value());
if (null == included)
continue;
+ if( DomUtils.isAncestorOf(included, current) ) {
+ final int[] nodeLocation = DomUtils.getNodeLocation(current);
+ report.notifyIssue(
+ IssueReport.IssueLevel.Warning,
+ "Current node tries to include an ancestor node.",
+ nodeLocation[0], nodeLocation[1]
+ );
+ continue;
+ }
current.appendChild(included.cloneNode(true));
}
}
@@ -114,7 +124,7 @@ public class HCardExtractor extends Enti
@Override
protected boolean extractEntity(Node node, ExtractionResult out) throws ExtractionException {
this.fragment = new HTMLDocument(node);
- fixIncludes(getHTMLDocument(), node);
+ fixIncludes(getHTMLDocument(), node, out);
final BNode card = getBlankNodeFor(node);
boolean foundSomething = false;
Modified: incubator/any23/trunk/core/src/test/java/org/apache/any23/extractor/ExtractionResultImplTest.java
URL: http://svn.apache.org/viewvc/incubator/any23/trunk/core/src/test/java/org/apache/any23/extractor/ExtractionResultImplTest.java?rev=1304362&r1=1304361&r2=1304362&view=diff
==============================================================================
--- incubator/any23/trunk/core/src/test/java/org/apache/any23/extractor/ExtractionResultImplTest.java (original)
+++ incubator/any23/trunk/core/src/test/java/org/apache/any23/extractor/ExtractionResultImplTest.java Fri Mar 23 14:16:04 2012
@@ -68,14 +68,15 @@ public class ExtractionResultImplTest {
@Test
public void testNotifyErrors() throws IOException {
+ notifyErrors(extractionResult);
+ assertContent(extractionResult, 3);
+
final ExtractionResult subExtractionResult = extractionResult.openSubResult(
new ExtractionContext("sub-id", RDFUtils.uri("http://sub/uri") )
);
- notifyErrors(extractionResult);
- notifyErrors(subExtractionResult);
- assertContent(extractionResult);
- assertContent(subExtractionResult);
+ notifyErrors(subExtractionResult);
+ assertContent(subExtractionResult, 6);
}
private void notifyErrors(ExtractionResult er) {
@@ -84,12 +85,12 @@ public class ExtractionResultImplTest {
er.notifyIssue(IssueReport.IssueLevel.Fatal , "Fatal message" , 5, 6);
}
- private void assertContent(ExtractionResult er) {
- Assert.assertEquals("Unexpected errors list size." , 3, er.getIssues().size() );
+ private void assertContent(ExtractionResult er, int errorCount) {
+ Assert.assertEquals("Unexpected errors list size." , errorCount, er.getIssues().size() );
assertOutputString(er, IssueReport.IssueLevel.Error.toString());
assertOutputString(er, IssueReport.IssueLevel.Warning.toString());
assertOutputString(er, IssueReport.IssueLevel.Fatal.toString());
- assertOutputString(er, "errors: 3");
+ assertOutputString(er, "errors: " + errorCount);
}
private void assertOutputString(ExtractionResult er, String s) {
Modified: incubator/any23/trunk/core/src/test/java/org/apache/any23/extractor/html/HCardExtractorTest.java
URL: http://svn.apache.org/viewvc/incubator/any23/trunk/core/src/test/java/org/apache/any23/extractor/html/HCardExtractorTest.java?rev=1304362&r1=1304361&r2=1304362&view=diff
==============================================================================
--- incubator/any23/trunk/core/src/test/java/org/apache/any23/extractor/html/HCardExtractorTest.java (original)
+++ incubator/any23/trunk/core/src/test/java/org/apache/any23/extractor/html/HCardExtractorTest.java Fri Mar 23 14:16:04 2012
@@ -20,8 +20,8 @@ package org.apache.any23.extractor.html;
import junit.framework.Assert;
import org.apache.any23.extractor.ExtractionException;
import org.apache.any23.extractor.ExtractorFactory;
+import org.apache.any23.extractor.IssueReport;
import org.apache.any23.rdf.RDFUtils;
-import org.apache.any23.vocab.SINDICE;
import org.apache.any23.vocab.VCARD;
import org.junit.Test;
import org.openrdf.model.Resource;
@@ -40,7 +40,6 @@ import java.util.List;
*/
public class HCardExtractorTest extends AbstractExtractorTestCase {
- private static final SINDICE vSINDICE = SINDICE.getInstance();
private static final VCARD vVCARD = VCARD.getInstance();
protected ExtractorFactory<?> getExtractorFactory() {
@@ -954,6 +953,19 @@ public class HCardExtractorTest extends
assertStatementsSize(vVCARD.family_name, "Mostarda", 7);
}
+ /**
+ * Tests the detection and prevention of the inclusion of an ancestor by a sibling node.
+ * This test is related to issue <a href="https://issues.apache.org/jira/browse/ANY23-58">ANY23-58</a>.
+ *
+ * @throws IOException
+ * @throws ExtractionException
+ */
+ @Test
+ public void testInfiniteLoop() throws IOException, ExtractionException {
+ assertExtract("microformats/hcard/infinite-loop.html", false);
+ assertIssue(IssueReport.IssueLevel.Warning, ".*Current node tries to include an ancestor node.*");
+ }
+
private void assertDefaultVCard() throws RepositoryException {
assertModelNotEmpty();
assertStatementsSize(RDF.TYPE, vVCARD.VCard, 1);