You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by le...@apache.org on 2013/06/20 22:44:19 UTC

svn commit: r1495174 - in /nutch/branches/2.x: CHANGES.txt src/plugin/microformats-reltag/src/java/org/apache/nutch/microformats/reltag/RelTagParser.java

Author: lewismc
Date: Thu Jun 20 20:44:19 2013
New Revision: 1495174

URL: http://svn.apache.org/r1495174
Log:
NUTCH-1585 Ensure duplicate tags do not exist in microformat-reltag tag set.

Modified:
    nutch/branches/2.x/CHANGES.txt
    nutch/branches/2.x/src/plugin/microformats-reltag/src/java/org/apache/nutch/microformats/reltag/RelTagParser.java

Modified: nutch/branches/2.x/CHANGES.txt
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/CHANGES.txt?rev=1495174&r1=1495173&r2=1495174&view=diff
==============================================================================
--- nutch/branches/2.x/CHANGES.txt (original)
+++ nutch/branches/2.x/CHANGES.txt Thu Jun 20 20:44:19 2013
@@ -2,6 +2,8 @@ Nutch Change Log
 
 Current Development
 
+* NUTCH-1585 Ensure duplicate tags do not exist in microformat-reltag tag set (lewismc)
+
 * NUTCH-1475 Index-More Plugin -- A better fall back value for date field (James Sullivan, snagel via lewismc)
 
 * NUTCH-1420 Get rid of the dreaded � (markus + lewismc)

Modified: nutch/branches/2.x/src/plugin/microformats-reltag/src/java/org/apache/nutch/microformats/reltag/RelTagParser.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/plugin/microformats-reltag/src/java/org/apache/nutch/microformats/reltag/RelTagParser.java?rev=1495174&r1=1495173&r2=1495174&view=diff
==============================================================================
--- nutch/branches/2.x/src/plugin/microformats-reltag/src/java/org/apache/nutch/microformats/reltag/RelTagParser.java (original)
+++ nutch/branches/2.x/src/plugin/microformats-reltag/src/java/org/apache/nutch/microformats/reltag/RelTagParser.java Thu Jun 20 20:44:19 2013
@@ -85,8 +85,10 @@ public class RelTagParser implements Par
 	      if ("tag".equalsIgnoreCase(relNode.getNodeValue())) {
 	        String tag = parseTag(hrefNode.getNodeValue());
 	        if (!StringUtil.isEmpty(tag)) {
-	          tags.add(tag);
-		  LOG.debug("Adding tag: " + tag + " to tag set.");
+	          if(!tags.contains(tag)){
+                    tags.add(tag);
+		    LOG.debug("Adding tag: " + tag + " to tag set.");
+                  }
 	        }
 	      }
 	    }