You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by cu...@apache.org on 2005/06/02 21:03:24 UTC
svn commit: r179625 -
/incubator/nutch/trunk/src/java/org/apache/nutch/searcher/OpenSearchServlet.java
Author: cutting
Date: Thu Jun 2 12:03:23 2005
New Revision: 179625
URL: http://svn.apache.org/viewcvs?rev=179625&view=rev
Log:
Permit specification of dedup field, no longer hard-wired to "site".
Patch by Michael Stack.
Modified:
incubator/nutch/trunk/src/java/org/apache/nutch/searcher/OpenSearchServlet.java
Modified: incubator/nutch/trunk/src/java/org/apache/nutch/searcher/OpenSearchServlet.java
URL: http://svn.apache.org/viewcvs/incubator/nutch/trunk/src/java/org/apache/nutch/searcher/OpenSearchServlet.java?rev=179625&r1=179624&r2=179625&view=diff
==============================================================================
--- incubator/nutch/trunk/src/java/org/apache/nutch/searcher/OpenSearchServlet.java (original)
+++ incubator/nutch/trunk/src/java/org/apache/nutch/searcher/OpenSearchServlet.java Thu Jun 2 12:03:23 2005
@@ -93,23 +93,42 @@
if (hitsString != null)
hitsPerPage = Integer.parseInt(hitsString);
- int hitsPerSite = 2; // max hits per site
- String hitsPerSiteString = request.getParameter("hitsPerSite");
- if (hitsPerSiteString != null)
- hitsPerSite = Integer.parseInt(hitsPerSiteString);
-
String sort = request.getParameter("sort");
boolean reverse =
sort!=null && "true".equals(request.getParameter("reverse"));
+ // De-Duplicate handling. Look for duplicates field and for how many
+ // duplicates per results to return. Default duplicates field is 'site'
+ // and duplicates per results default is '2'.
+ String dedupField = request.getParameter("dedupField");
+ if (dedupField == null || dedupField.length() == 0) {
+ dedupField = "site";
+ }
+ int hitsPerDup = 2;
+ String hitsPerDupString = request.getParameter("hitsPerDup");
+ if (hitsPerDupString != null && hitsPerDupString.length() > 0) {
+ hitsPerDup = Integer.parseInt(hitsPerDupString);
+ } else {
+ // If 'hitsPerSite' present, use that value.
+ String hitsPerSiteString = request.getParameter("hitsPerSite");
+ if (hitsPerSiteString != null && hitsPerSiteString.length() > 0) {
+ hitsPerDup = Integer.parseInt(hitsPerSiteString);
+ }
+ }
+
+ // Make up query string for use later drawing the 'rss' logo.
+ String params = "&hitsPerPage=" + hitsPerPage +
+ (sort == null ? "" : "&sort=" + sort + (reverse? "&reverse=true": "") +
+ (dedupField == null ? "" : "&dedupField=" + dedupField));
+
Query query = Query.parse(queryString);
bean.LOG.info("query: " + queryString);
// execute the query
Hits hits;
try {
- hits = bean.search(query, start + hitsPerPage, hitsPerSite, "site",
- sort, reverse);
+ hits = bean.search(query, start + hitsPerPage, hitsPerDup, dedupField,
+ sort, reverse);
} catch (IOException e) {
bean.LOG.log(Level.WARNING, "Search Error", e);
hits = new Hits(0,new Hit[0]);
@@ -127,8 +146,6 @@
String requestUrl = request.getRequestURL().toString();
String base = requestUrl.substring(0, requestUrl.lastIndexOf('/'));
- String params = "&hitsPerPage="+hitsPerPage
- +(sort==null ? "" : "&sort="+sort+(reverse?"&reverse=true":""));
try {
@@ -151,7 +168,7 @@
base+"/search.jsp"
+"?query="+urlQuery
+"&start="+start
- +"&hitsPerSite="+hitsPerSite
+ +"&hitsPerDup="+hitsPerDup
+params);
addNode(doc, channel, "opensearch", "totalResults", ""+hits.getTotal());
@@ -166,14 +183,14 @@
addNode(doc, channel, "nutch", "nextPage", requestUrl
+"?query="+urlQuery
+"&start="+end
- +"&hitsPerSite="+hitsPerSite
+ +"&hitsPerDup="+hitsPerDup
+params);
}
if ((!hits.totalIsExact() && (hits.getLength() <= start+hitsPerPage))) {
addNode(doc, channel, "nutch", "showAllHits", requestUrl
+"?query="+urlQuery
- +"&hitsPerSite="+0
+ +"&hitsPerDup="+0
+params);
}