You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by je...@apache.org on 2006/01/11 00:52:34 UTC
svn commit: r367837 -
/lucene/nutch/trunk/src/plugin/parse-html/src/java/org/apache/nutch/parse/html/HTMLMetaProcessor.java
Author: jerome
Date: Tue Jan 10 15:52:31 2006
New Revision: 367837
URL: http://svn.apache.org/viewcvs?rev=367837&view=rev
Log:
HTMLMetaProcessor now retrieves name, http-equiv and content attributes whatever their case
Modified:
lucene/nutch/trunk/src/plugin/parse-html/src/java/org/apache/nutch/parse/html/HTMLMetaProcessor.java
Modified: lucene/nutch/trunk/src/plugin/parse-html/src/java/org/apache/nutch/parse/html/HTMLMetaProcessor.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/parse-html/src/java/org/apache/nutch/parse/html/HTMLMetaProcessor.java?rev=367837&r1=367836&r2=367837&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/parse-html/src/java/org/apache/nutch/parse/html/HTMLMetaProcessor.java (original)
+++ lucene/nutch/trunk/src/plugin/parse-html/src/java/org/apache/nutch/parse/html/HTMLMetaProcessor.java Tue Jan 10 15:52:31 2006
@@ -59,10 +59,22 @@
if ("meta".equalsIgnoreCase(node.getNodeName())) {
NamedNodeMap attrs = node.getAttributes();
- Node nameNode = attrs.getNamedItem("name");
- Node equivNode = attrs.getNamedItem("http-equiv");
- Node contentNode = attrs.getNamedItem("content");
-
+ Node nameNode = null;
+ Node equivNode = null;
+ Node contentNode = null;
+ // Retrieves name, http-equiv and content attribues
+ for (int i=0; i<attrs.getLength(); i++) {
+ Node attr = attrs.item(i);
+ String attrName = attr.getNodeName().toLowerCase();
+ if (attrName.equals("name")) {
+ nameNode = attr;
+ } else if (attrName.equals("http-equiv")) {
+ equivNode = attr;
+ } else if (attrName.equals("content")) {
+ contentNode = attr;
+ }
+ }
+
if (nameNode != null) {
if (contentNode != null) {
String name = nameNode.getNodeValue().toLowerCase();