You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by ab...@apache.org on 2007/05/09 20:54:07 UTC

svn commit: r536618 - /lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseResult.java

Author: ab
Date: Wed May  9 11:54:06 2007
New Revision: 536618

URL: http://svn.apache.org/viewvc?view=rev&rev=536618
Log:
Add missing file from the last commit. Spotted by Sami.

Added:
    lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseResult.java   (with props)

Added: lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseResult.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseResult.java?view=auto&rev=536618
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseResult.java (added)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseResult.java Wed May  9 11:54:06 2007
@@ -0,0 +1,71 @@
+package org.apache.nutch.parse;
+
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.Map;
+import java.util.Map.Entry;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.io.Text;
+
+/**
+ * A utility class that stores result of a parse. Internally
+ * a ParseResult stores <{@link Text}, {@link Parse}> pairs.
+ */
+public class ParseResult implements Iterable<Map.Entry<Text, Parse>> {
+  private Map<Text, Parse> parseMap;
+  private String originalUrl;
+  
+  public static final Log LOG = LogFactory.getLog(ParseResult.class);
+  
+  public ParseResult(String originalUrl) {
+    parseMap = new HashMap<Text, Parse>();
+    this.originalUrl = originalUrl;
+  }
+  
+  public static ParseResult createParseResult(String url, Parse parse) {
+    ParseResult parseResult = new ParseResult(url);
+    parseResult.put(new Text(url), new ParseText(parse.getText()), parse.getData());
+    return parseResult;
+  }
+  
+  public boolean isEmpty() {
+    return parseMap.isEmpty();
+  }
+  
+  public int size() {
+    return parseMap.size();
+  }
+  
+  public Parse get(String key) {
+    return get(new Text(key));
+  }
+  
+  public Parse get(Text key) {
+    return parseMap.get(key);
+  }
+  
+  public void put(Text key, ParseText text, ParseData data) {
+    put(key.toString(), text, data);
+  }
+  
+  public void put(String key, ParseText text, ParseData data) {
+    parseMap.put(new Text(key), new ParseImpl(text, data, key.equals(originalUrl)));
+  }
+
+  public Iterator<Entry<Text, Parse>> iterator() {
+    return parseMap.entrySet().iterator();
+  }
+  
+  public void filter() {
+    for(Iterator<Entry<Text, Parse>> i = iterator(); i.hasNext();) {
+      Entry<Text, Parse> entry = i.next();
+      if (!entry.getValue().getData().getStatus().isSuccess()) {
+        LOG.warn(entry.getKey() + " is not parsed successfully, filtering");
+        i.remove();
+      }
+    }
+      
+  }
+}

Propchange: lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseResult.java
------------------------------------------------------------------------------
    svn:eol-style = native