You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by ab...@apache.org on 2007/05/09 21:15:46 UTC
svn commit: r536623 -
/lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseResult.java
Author: ab
Date: Wed May 9 12:15:45 2007
New Revision: 536623
URL: http://svn.apache.org/viewvc?view=rev&rev=536623
Log:
Add missing javadoc and license header.
Modified:
lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseResult.java
Modified: lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseResult.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseResult.java?view=diff&rev=536623&r1=536622&r2=536623
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseResult.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseResult.java Wed May 9 12:15:45 2007
@@ -1,3 +1,20 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
package org.apache.nutch.parse;
import java.util.HashMap;
@@ -12,6 +29,11 @@
/**
* A utility class that stores result of a parse. Internally
* a ParseResult stores <{@link Text}, {@link Parse}> pairs.
+ * <p>Parsers may return multiple results, which correspond to parts
+ * or other associated documents related to the original URL.</p>
+ * <p>There will be usually one parse result that corresponds directly
+ * to the original URL, and possibly many (or none) results that correspond
+ * to derived URLs (or sub-URLs).
*/
public class ParseResult implements Iterable<Map.Entry<Text, Parse>> {
private Map<Text, Parse> parseMap;
@@ -19,45 +41,94 @@
public static final Log LOG = LogFactory.getLog(ParseResult.class);
+ /**
+ * Create a container for parse results.
+ * @param originalUrl the original url from which all parse results
+ * have been obtained.
+ */
public ParseResult(String originalUrl) {
parseMap = new HashMap<Text, Parse>();
this.originalUrl = originalUrl;
}
+ /**
+ * Convenience method for obtaining {@link ParseResult} from a single
+ * {@link Parse} output.
+ * @param url canonical url
+ * @param parse single parse output
+ * @return result containing the single parse output
+ */
public static ParseResult createParseResult(String url, Parse parse) {
ParseResult parseResult = new ParseResult(url);
parseResult.put(new Text(url), new ParseText(parse.getText()), parse.getData());
return parseResult;
}
+ /**
+ * Checks whether the result is empty.
+ * @return
+ */
public boolean isEmpty() {
return parseMap.isEmpty();
}
+ /**
+ * Return the number of parse outputs (both successful and failed)
+ */
public int size() {
return parseMap.size();
}
+ /**
+ * Retrieve a single parse output.
+ * @param key sub-url under which the parse output is stored.
+ * @return parse output corresponding to this sub-url, or null.
+ */
public Parse get(String key) {
return get(new Text(key));
}
+ /**
+ * Retrieve a single parse output.
+ * @param key sub-url under which the parse output is stored.
+ * @return parse output corresponding to this sub-url, or null.
+ */
public Parse get(Text key) {
return parseMap.get(key);
}
+ /**
+ * Store a result of parsing.
+ * @param key URL or sub-url of this parse result
+ * @param text plain text result
+ * @param data corresponding parse metadata of this result
+ */
public void put(Text key, ParseText text, ParseData data) {
put(key.toString(), text, data);
}
+ /**
+ * Store a result of parsing.
+ * @param key URL or sub-url of this parse result
+ * @param text plain text result
+ * @param data corresponding parse metadata of this result
+ */
public void put(String key, ParseText text, ParseData data) {
parseMap.put(new Text(key), new ParseImpl(text, data, key.equals(originalUrl)));
}
+ /**
+ * Iterate over all entries in the <url, Parse> map.
+ */
public Iterator<Entry<Text, Parse>> iterator() {
return parseMap.entrySet().iterator();
}
+ /**
+ * Remove all results where status is not successful (as determined
+ * by {@link ParseStatus#isSuccess()}). Note that effects of this operation
+ * cannot be reversed.
+ */
public void filter() {
for(Iterator<Entry<Text, Parse>> i = iterator(); i.hasNext();) {
Entry<Text, Parse> entry = i.next();