You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by je...@apache.org on 2006/01/11 00:52:34 UTC

svn commit: r367837 - /lucene/nutch/trunk/src/plugin/parse-html/src/java/org/apache/nutch/parse/html/HTMLMetaProcessor.java

Author: jerome
Date: Tue Jan 10 15:52:31 2006
New Revision: 367837

URL: http://svn.apache.org/viewcvs?rev=367837&view=rev
Log:
HTMLMetaProcessor now retrieves name, http-equiv and content attributes whatever their case

Modified:
    lucene/nutch/trunk/src/plugin/parse-html/src/java/org/apache/nutch/parse/html/HTMLMetaProcessor.java

Modified: lucene/nutch/trunk/src/plugin/parse-html/src/java/org/apache/nutch/parse/html/HTMLMetaProcessor.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/parse-html/src/java/org/apache/nutch/parse/html/HTMLMetaProcessor.java?rev=367837&r1=367836&r2=367837&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/parse-html/src/java/org/apache/nutch/parse/html/HTMLMetaProcessor.java (original)
+++ lucene/nutch/trunk/src/plugin/parse-html/src/java/org/apache/nutch/parse/html/HTMLMetaProcessor.java Tue Jan 10 15:52:31 2006
@@ -59,10 +59,22 @@
 
       if ("meta".equalsIgnoreCase(node.getNodeName())) {
         NamedNodeMap attrs = node.getAttributes();
-        Node nameNode = attrs.getNamedItem("name");
-        Node equivNode = attrs.getNamedItem("http-equiv");
-        Node contentNode = attrs.getNamedItem("content");
-
+        Node nameNode = null;
+        Node equivNode = null;
+        Node contentNode = null;
+        // Retrieves name, http-equiv and content attribues
+        for (int i=0; i<attrs.getLength(); i++) {
+          Node attr = attrs.item(i);
+          String attrName = attr.getNodeName().toLowerCase();
+          if (attrName.equals("name")) {
+            nameNode = attr;
+          } else if (attrName.equals("http-equiv")) {
+            equivNode = attr;
+          } else if (attrName.equals("content")) {
+            contentNode = attr;
+          }
+        }
+        
         if (nameNode != null) {
           if (contentNode != null) {
             String name = nameNode.getNodeValue().toLowerCase();