You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by cu...@apache.org on 2005/04/13 00:11:41 UTC

svn commit: r161127 - in incubator/nutch/trunk/src: java/org/apache/nutch/searcher/OpenSearchServlet.java web/web.xml

Author: cutting
Date: Tue Apr 12 15:11:40 2005
New Revision: 161127

URL: http://svn.apache.org/viewcvs?view=rev&rev=161127
Log:
First version of an OpenSearch RSS servlet for search results.

Added:
    incubator/nutch/trunk/src/java/org/apache/nutch/searcher/OpenSearchServlet.java
Modified:
    incubator/nutch/trunk/src/web/web.xml

Added: incubator/nutch/trunk/src/java/org/apache/nutch/searcher/OpenSearchServlet.java
URL: http://svn.apache.org/viewcvs/incubator/nutch/trunk/src/java/org/apache/nutch/searcher/OpenSearchServlet.java?view=auto&rev=161127
==============================================================================
--- incubator/nutch/trunk/src/java/org/apache/nutch/searcher/OpenSearchServlet.java (added)
+++ incubator/nutch/trunk/src/java/org/apache/nutch/searcher/OpenSearchServlet.java Tue Apr 12 15:11:40 2005
@@ -0,0 +1,213 @@
+/**
+ * Copyright 2005 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.nutch.searcher;
+
+import java.io.IOException;
+import java.net.URL;
+import java.net.URLEncoder;
+import java.util.logging.Level;
+
+import javax.servlet.ServletException;
+import javax.servlet.ServletConfig;
+import javax.servlet.http.HttpServlet;
+import javax.servlet.http.HttpUtils;
+import javax.servlet.http.HttpServletRequest;
+import javax.servlet.http.HttpServletResponse;
+
+import javax.xml.parsers.*;
+import org.w3c.dom.*;
+import javax.xml.transform.TransformerFactory;
+import javax.xml.transform.Transformer;
+import javax.xml.transform.dom.DOMSource;
+import javax.xml.transform.stream.StreamResult;
+
+import org.apache.nutch.html.Entities;
+import org.apache.nutch.searcher.*;
+import org.apache.nutch.plugin.*;
+import org.apache.nutch.clustering.*;
+import org.apache.nutch.util.NutchConf;
+
+
+/** Present search results using A9's OpenSearch extensions to RSS, plus a few
+ * Nutch-specific extensions. */   
+public class OpenSearchServlet extends HttpServlet {
+  private static final String OPENSEARCH_NS =
+    "http://a9.com/-/spec/opensearchrss/1.0/";
+
+  private static final String NUTCH_NS =
+    "http://www.nutch.org/opensearchrss/1.0/";
+
+  private NutchBean bean;
+
+  public void init(ServletConfig config) throws ServletException {
+    try {
+      bean = NutchBean.get(config.getServletContext());
+    } catch (IOException e) {
+      throw new ServletException(e);
+    }
+  }
+
+  public void doGet(HttpServletRequest request, HttpServletResponse response)
+    throws ServletException, IOException {
+
+    bean.LOG.info("query request from " + request.getRemoteAddr());
+
+    // get parameters from request
+    request.setCharacterEncoding("UTF-8");
+    String queryString = request.getParameter("query");
+    if (queryString == null)
+      queryString = "";
+    String htmlQueryString = Entities.encode(queryString);
+
+    int start = 0;                                // first hit to display
+    String startString = request.getParameter("start");
+    if (startString != null)
+      start = Integer.parseInt(startString);
+    
+    int hitsPerPage = 10;                         // number of hits to display
+    String hitsString = request.getParameter("hitsPerPage");
+    if (hitsString != null)
+      hitsPerPage = Integer.parseInt(hitsString);
+
+    int hitsPerSite = 2;                          // max hits per site
+    String hitsPerSiteString = request.getParameter("hitsPerSite");
+    if (hitsPerSiteString != null)
+      hitsPerSite = Integer.parseInt(hitsPerSiteString);
+
+    Query query = Query.parse(queryString);
+    bean.LOG.info("query: " + queryString);
+
+    // execute the query
+    Hits hits;
+    try {
+      hits = bean.search(query, start + hitsPerPage, hitsPerSite);
+    } catch (IOException e) {
+      bean.LOG.log(Level.WARNING, "Search Error", e);
+      hits = new Hits(0,new Hit[0]);	
+    }
+
+    bean.LOG.info("total hits: " + hits.getTotal());
+
+    // generate xml results
+    int end = (int)Math.min(hits.getLength(), start + hitsPerPage);
+    int length = end-start;
+
+    Hit[] show = hits.getHits(start, end-start);
+    HitDetails[] details = bean.getDetails(show);
+    String[] summaries = bean.getSummary(details, query);
+
+    String requestUrl = HttpUtils.getRequestURL(request).toString();
+    String base = requestUrl.substring(0, requestUrl.lastIndexOf('/'));
+
+    try {
+      Document doc = DocumentBuilderFactory.newInstance()
+        .newDocumentBuilder().newDocument();
+ 
+      Element rss = addNode(doc, doc, "rss");
+      addAttribute(doc, rss, "version", "2.0");
+
+      Element channel = addNode(doc, rss, "channel");
+    
+      addNode(doc, channel, "title", "Nutch: " + queryString);
+      addNode(doc, channel, "description", "Nutch search results for query: "
+              + queryString);
+      addNode(doc, channel, "link",
+              base+"/search.jsp"
+              +"?query="+htmlQueryString
+              +"&start="+start
+              +"&hitsPerPage="+hitsPerPage
+              +"&hitsPerSite="+hitsPerSite);
+
+      addNode(doc, channel, OPENSEARCH_NS, "totalResults", ""+hits.getTotal());
+      addNode(doc, channel, OPENSEARCH_NS, "startIndex", ""+start);
+      addNode(doc, channel, OPENSEARCH_NS, "itemsPerPage", ""+hitsPerPage);
+    
+      for (int i = 0; i < length; i++) {
+        Hit hit = show[i];
+        HitDetails detail = details[i];
+        String title = detail.getValue("title");
+        String url = detail.getValue("url");
+        String id = "idx=" + hit.getIndexNo() + "&id=" + hit.getIndexDocNo();
+      
+        if (title == null || title.equals(""))    // use url for docs w/o title
+          title = url;
+
+        Element item = addNode(doc, channel, "item");
+
+        addNode(doc, item, "title", title);
+        addNode(doc, item, "description", summaries[i]);
+        addNode(doc, item, "link", url);
+
+        addNode(doc, channel, NUTCH_NS, "cache", base+"/cached.jsp?"+id);
+        addNode(doc, channel, NUTCH_NS, "explain", base+"/explain.jsp?"+id
+                +"&query="+URLEncoder.encode(queryString));
+
+        if (hit.moreFromSiteExcluded()) {
+          addNode(doc, channel, NUTCH_NS, "moreFromSite", base+"/search.jsp"
+                  +"?query="
+                  +URLEncoder.encode("site:"+hit.getSite()+" "+queryString)
+                  +"&hitsPerPage="+hitsPerPage+"&hitsPerSite="+0);
+        }
+      }
+
+      // dump DOM tree
+
+      DOMSource source = new DOMSource(doc);
+      TransformerFactory transFactory = TransformerFactory.newInstance();
+      Transformer transformer = transFactory.newTransformer();
+      transformer.setOutputProperty("indent", "yes");
+      StreamResult result = new StreamResult(response.getOutputStream());
+      response.setContentType("text/xml");
+      transformer.transform(source, result);
+
+    } catch (javax.xml.parsers.ParserConfigurationException e) {
+      throw new ServletException(e);
+    } catch (javax.xml.transform.TransformerException e) {
+      throw new ServletException(e);
+    }
+      
+  }
+
+  private static Element addNode(Document doc, Node parent, String name) {
+    Element child = doc.createElement(name);
+    parent.appendChild(child);
+    return child;
+  }
+
+  private static void addNode(Document doc, Node parent,
+                              String name, String text) {
+    Element child = doc.createElement(name);
+    child.appendChild(doc.createTextNode(text));
+    parent.appendChild(child);
+  }
+
+  private static void addNode(Document doc, Node parent,
+                              String nameSpace, String name, String text) {
+    Element child = doc.createElementNS(nameSpace, name);
+    child.appendChild(doc.createTextNode(text));
+    parent.appendChild(child);
+  }
+
+  private static void addAttribute(Document doc, Element node,
+                                   String name, String value) {
+    Attr attribute = doc.createAttribute(name);
+    attribute.setValue(value);
+    node.getAttributes().setNamedItem(attribute);
+  }
+
+}
+

Modified: incubator/nutch/trunk/src/web/web.xml
URL: http://svn.apache.org/viewcvs/incubator/nutch/trunk/src/web/web.xml?view=diff&r1=161126&r2=161127
==============================================================================
--- incubator/nutch/trunk/src/web/web.xml (original)
+++ incubator/nutch/trunk/src/web/web.xml Tue Apr 12 15:11:40 2005
@@ -13,9 +13,19 @@
   <servlet-class>org.apache.nutch.servlet.Cached</servlet-class>
 </servlet>
 
+<servlet>
+  <servlet-name>OpenSearch</servlet-name>
+  <servlet-class>org.apache.nutch.searcher.OpenSearchServlet</servlet-class>
+</servlet>
+
 <servlet-mapping>
   <servlet-name>Cached</servlet-name>
   <url-pattern>/servlet/cached</url-pattern>
+</servlet-mapping>
+
+<servlet-mapping>
+  <servlet-name>OpenSearch</servlet-name>
+  <url-pattern>/opensearch</url-pattern>
 </servlet-mapping>
 
 <welcome-file-list>