You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by ab...@apache.org on 2006/01/12 00:24:53 UTC

svn commit: r368167 - in /lucene/nutch/trunk/src/java/org/apache/nutch: fetcher/Fetcher.java parse/ParseSegment.java

Author: ab
Date: Wed Jan 11 15:24:40 2006
New Revision: 368167

URL: http://svn.apache.org/viewcvs?rev=368167&view=rev
Log:
Make sure we always have the segment name and score values in
ParseData.metadata. Sometimes plugins would fail to copy them through,
or a parsing error would produce empty ParseData.metadata.

Modified:
    lucene/nutch/trunk/src/java/org/apache/nutch/fetcher/Fetcher.java
    lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseSegment.java

Modified: lucene/nutch/trunk/src/java/org/apache/nutch/fetcher/Fetcher.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/fetcher/Fetcher.java?rev=368167&r1=368166&r2=368167&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/fetcher/Fetcher.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/fetcher/Fetcher.java Wed Jan 11 15:24:40 2006
@@ -223,6 +223,9 @@
         parse.getData().getMetadata().setProperty(SIGNATURE_KEY, StringUtil.toHexString(signature));
         datum.setSignature(signature);
       }
+      // add segment name and score to parseData metadata
+      parse.getData().getMetadata().setProperty(SEGMENT_NAME_KEY, segmentName);
+      parse.getData().getMetadata().setProperty(SCORE_KEY, Float.toString(datum.getScore()));
 
       try {
         output.collect

Modified: lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseSegment.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseSegment.java?rev=368167&r1=368166&r2=368167&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseSegment.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseSegment.java Wed Jan 11 15:24:40 2006
@@ -58,9 +58,16 @@
       status = new ParseStatus(e);
     }
 
+    ContentProperties metadata = parse.getData().getMetadata();
     // compute the new signature
     byte[] signature = SignatureFactory.getSignature(getConf()).calculate(content, parse);
-    parse.getData().getMetadata().setProperty(Fetcher.SIGNATURE_KEY, StringUtil.toHexString(signature));
+    metadata.setProperty(Fetcher.SIGNATURE_KEY, StringUtil.toHexString(signature));
+    // copy segment name and score
+    String segmentName = content.getMetadata().getProperty(Fetcher.SEGMENT_NAME_KEY);
+    String score = content.getMetadata().getProperty(Fetcher.SCORE_KEY);
+    metadata.setProperty(Fetcher.SEGMENT_NAME_KEY, segmentName);
+    metadata.setProperty(Fetcher.SCORE_KEY, score);
+    
     if (status.isSuccess()) {
       output.collect(key, new ParseImpl(parse.getText(), parse.getData()));
     } else {