You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@any23.apache.org by mo...@apache.org on 2012/04/22 12:01:18 UTC

svn commit: r1328835 - in /incubator/any23/trunk/core/src/main/java/org/apache/any23: extractor/ExtractionContext.java extractor/microdata/MicrodataParser.java writer/RDFWriterTripleHandler.java

Author: mostarda
Date: Sun Apr 22 10:01:17 2012
New Revision: 1328835

URL: http://svn.apache.org/viewvc?rev=1328835&view=rev
Log:
Replaced node nesting detection logic with higher performance code. This commit is related to issue #ANY23-77.

Modified:
    incubator/any23/trunk/core/src/main/java/org/apache/any23/extractor/ExtractionContext.java
    incubator/any23/trunk/core/src/main/java/org/apache/any23/extractor/microdata/MicrodataParser.java
    incubator/any23/trunk/core/src/main/java/org/apache/any23/writer/RDFWriterTripleHandler.java

Modified: incubator/any23/trunk/core/src/main/java/org/apache/any23/extractor/ExtractionContext.java
URL: http://svn.apache.org/viewvc/incubator/any23/trunk/core/src/main/java/org/apache/any23/extractor/ExtractionContext.java?rev=1328835&r1=1328834&r2=1328835&view=diff
==============================================================================
--- incubator/any23/trunk/core/src/main/java/org/apache/any23/extractor/ExtractionContext.java (original)
+++ incubator/any23/trunk/core/src/main/java/org/apache/any23/extractor/ExtractionContext.java Sun Apr 22 10:01:17 2012
@@ -101,7 +101,7 @@ public class ExtractionContext {
     }
 
     public String toString() {
-        return String.format("ExtractionContext(%s)", uniqueID);
+        return "ExtractionContext(" + uniqueID + ")";
     }
 
     private void checkNotNull(Object data, String desc) {

Modified: incubator/any23/trunk/core/src/main/java/org/apache/any23/extractor/microdata/MicrodataParser.java
URL: http://svn.apache.org/viewvc/incubator/any23/trunk/core/src/main/java/org/apache/any23/extractor/microdata/MicrodataParser.java?rev=1328835&r1=1328834&r2=1328835&view=diff
==============================================================================
--- incubator/any23/trunk/core/src/main/java/org/apache/any23/extractor/microdata/MicrodataParser.java (original)
+++ incubator/any23/trunk/core/src/main/java/org/apache/any23/extractor/microdata/MicrodataParser.java Sun Apr 22 10:01:17 2012
@@ -17,7 +17,6 @@
 package org.apache.any23.extractor.microdata;
 
 import org.apache.any23.extractor.html.DomUtils;
-import org.apache.any23.util.StringUtils;
 import org.w3c.dom.Document;
 import org.w3c.dom.Element;
 import org.w3c.dom.Node;
@@ -254,12 +253,7 @@ public class MicrodataParser {
             boolean skip = false;
             for(int j = 0; j < candidates.size(); j++) {
                 if(i == j) continue;
-                if(
-                        StringUtils.isPrefix(
-                                DomUtils.getXPathForNode(candidates.get(j)),
-                                DomUtils.getXPathForNode(candidates.get(i))
-                        )
-                ) {
+                if( DomUtils.isAncestorOf(candidates.get(j), candidates.get(i), true) ) {
                     skip = true;
                     break;
                 }
@@ -360,19 +354,10 @@ public class MicrodataParser {
         final List<Node> subItemScopes = getItemScopeNodes(node);
         subItemScopes.remove(node);
         final List<Node> accepted = new ArrayList<Node>();
-        String subItemScopeXpath;
-        String subItemPropXPath;
         for(Node itemPropNode : itemPropNodes) {
             boolean skip = false;
             for(Node subItemScope : subItemScopes) {
-                subItemScopeXpath = DomUtils.getXPathForNode(subItemScope);
-                subItemPropXPath  = DomUtils.getXPathForNode(itemPropNode);
-                if(
-                    StringUtils.isPrefix(subItemScopeXpath, subItemPropXPath)
-                            &&
-                    // This prevent removal of itemprop that is also itemscope
-                    subItemScopeXpath.length() < subItemPropXPath.length()
-                ) {
+                if( DomUtils.isAncestorOf(subItemScope, itemPropNode, true) ) {
                     skip = true;
                     break;
                 }

Modified: incubator/any23/trunk/core/src/main/java/org/apache/any23/writer/RDFWriterTripleHandler.java
URL: http://svn.apache.org/viewvc/incubator/any23/trunk/core/src/main/java/org/apache/any23/writer/RDFWriterTripleHandler.java?rev=1328835&r1=1328834&r2=1328835&view=diff
==============================================================================
--- incubator/any23/trunk/core/src/main/java/org/apache/any23/writer/RDFWriterTripleHandler.java (original)
+++ incubator/any23/trunk/core/src/main/java/org/apache/any23/writer/RDFWriterTripleHandler.java Sun Apr 22 10:01:17 2012
@@ -82,7 +82,7 @@ public abstract class RDFWriterTripleHan
 
     @Override
     public void openContext(ExtractionContext context) throws TripleHandlerException {
-        handleComment( String.format("BEGIN: " + context) );
+        handleComment( "BEGIN: " + context );
     }
 
     @Override
@@ -114,7 +114,7 @@ public abstract class RDFWriterTripleHan
 
     @Override
     public void closeContext(ExtractionContext context) throws TripleHandlerException {
-        handleComment( String.format("END: " + context) );
+        handleComment( "END: " + context );
     }
 
     @Override