You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@any23.apache.org by mo...@apache.org on 2012/04/22 12:01:18 UTC
svn commit: r1328835 - in
/incubator/any23/trunk/core/src/main/java/org/apache/any23:
extractor/ExtractionContext.java extractor/microdata/MicrodataParser.java
writer/RDFWriterTripleHandler.java
Author: mostarda
Date: Sun Apr 22 10:01:17 2012
New Revision: 1328835
URL: http://svn.apache.org/viewvc?rev=1328835&view=rev
Log:
Replaced node nesting detection logic with higher performance code. This commit is related to issue #ANY23-77.
Modified:
incubator/any23/trunk/core/src/main/java/org/apache/any23/extractor/ExtractionContext.java
incubator/any23/trunk/core/src/main/java/org/apache/any23/extractor/microdata/MicrodataParser.java
incubator/any23/trunk/core/src/main/java/org/apache/any23/writer/RDFWriterTripleHandler.java
Modified: incubator/any23/trunk/core/src/main/java/org/apache/any23/extractor/ExtractionContext.java
URL: http://svn.apache.org/viewvc/incubator/any23/trunk/core/src/main/java/org/apache/any23/extractor/ExtractionContext.java?rev=1328835&r1=1328834&r2=1328835&view=diff
==============================================================================
--- incubator/any23/trunk/core/src/main/java/org/apache/any23/extractor/ExtractionContext.java (original)
+++ incubator/any23/trunk/core/src/main/java/org/apache/any23/extractor/ExtractionContext.java Sun Apr 22 10:01:17 2012
@@ -101,7 +101,7 @@ public class ExtractionContext {
}
public String toString() {
- return String.format("ExtractionContext(%s)", uniqueID);
+ return "ExtractionContext(" + uniqueID + ")";
}
private void checkNotNull(Object data, String desc) {
Modified: incubator/any23/trunk/core/src/main/java/org/apache/any23/extractor/microdata/MicrodataParser.java
URL: http://svn.apache.org/viewvc/incubator/any23/trunk/core/src/main/java/org/apache/any23/extractor/microdata/MicrodataParser.java?rev=1328835&r1=1328834&r2=1328835&view=diff
==============================================================================
--- incubator/any23/trunk/core/src/main/java/org/apache/any23/extractor/microdata/MicrodataParser.java (original)
+++ incubator/any23/trunk/core/src/main/java/org/apache/any23/extractor/microdata/MicrodataParser.java Sun Apr 22 10:01:17 2012
@@ -17,7 +17,6 @@
package org.apache.any23.extractor.microdata;
import org.apache.any23.extractor.html.DomUtils;
-import org.apache.any23.util.StringUtils;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
@@ -254,12 +253,7 @@ public class MicrodataParser {
boolean skip = false;
for(int j = 0; j < candidates.size(); j++) {
if(i == j) continue;
- if(
- StringUtils.isPrefix(
- DomUtils.getXPathForNode(candidates.get(j)),
- DomUtils.getXPathForNode(candidates.get(i))
- )
- ) {
+ if( DomUtils.isAncestorOf(candidates.get(j), candidates.get(i), true) ) {
skip = true;
break;
}
@@ -360,19 +354,10 @@ public class MicrodataParser {
final List<Node> subItemScopes = getItemScopeNodes(node);
subItemScopes.remove(node);
final List<Node> accepted = new ArrayList<Node>();
- String subItemScopeXpath;
- String subItemPropXPath;
for(Node itemPropNode : itemPropNodes) {
boolean skip = false;
for(Node subItemScope : subItemScopes) {
- subItemScopeXpath = DomUtils.getXPathForNode(subItemScope);
- subItemPropXPath = DomUtils.getXPathForNode(itemPropNode);
- if(
- StringUtils.isPrefix(subItemScopeXpath, subItemPropXPath)
- &&
- // This prevent removal of itemprop that is also itemscope
- subItemScopeXpath.length() < subItemPropXPath.length()
- ) {
+ if( DomUtils.isAncestorOf(subItemScope, itemPropNode, true) ) {
skip = true;
break;
}
Modified: incubator/any23/trunk/core/src/main/java/org/apache/any23/writer/RDFWriterTripleHandler.java
URL: http://svn.apache.org/viewvc/incubator/any23/trunk/core/src/main/java/org/apache/any23/writer/RDFWriterTripleHandler.java?rev=1328835&r1=1328834&r2=1328835&view=diff
==============================================================================
--- incubator/any23/trunk/core/src/main/java/org/apache/any23/writer/RDFWriterTripleHandler.java (original)
+++ incubator/any23/trunk/core/src/main/java/org/apache/any23/writer/RDFWriterTripleHandler.java Sun Apr 22 10:01:17 2012
@@ -82,7 +82,7 @@ public abstract class RDFWriterTripleHan
@Override
public void openContext(ExtractionContext context) throws TripleHandlerException {
- handleComment( String.format("BEGIN: " + context) );
+ handleComment( "BEGIN: " + context );
}
@Override
@@ -114,7 +114,7 @@ public abstract class RDFWriterTripleHan
@Override
public void closeContext(ExtractionContext context) throws TripleHandlerException {
- handleComment( String.format("END: " + context) );
+ handleComment( "END: " + context );
}
@Override