You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by ku...@apache.org on 2009/01/02 22:38:59 UTC
svn commit: r730845 - in /lucene/nutch/trunk: ./ conf/ lib/
src/java/org/apache/nutch/searcher/response/ src/plugin/
src/plugin/nutch-extensionpoints/ src/plugin/response-json/
src/plugin/response-json/lib/ src/plugin/response-json/src/
src/plugin/resp...
Author: kubes
Date: Fri Jan 2 13:38:58 2009
New Revision: 730845
URL: http://svn.apache.org/viewvc?rev=730845&view=rev
Log:
NUTCH-594: Serve Nutch search results in multiple formats including XML and JSON.
Added:
lucene/nutch/trunk/lib/commons-beanutils-1.8.0.jar (with props)
lucene/nutch/trunk/lib/commons-collections-3.2.1.jar (with props)
lucene/nutch/trunk/src/java/org/apache/nutch/searcher/response/
lucene/nutch/trunk/src/java/org/apache/nutch/searcher/response/RequestUtils.java
lucene/nutch/trunk/src/java/org/apache/nutch/searcher/response/ResponseWriter.java
lucene/nutch/trunk/src/java/org/apache/nutch/searcher/response/ResponseWriters.java
lucene/nutch/trunk/src/java/org/apache/nutch/searcher/response/SearchResults.java
lucene/nutch/trunk/src/java/org/apache/nutch/searcher/response/SearchServlet.java
lucene/nutch/trunk/src/plugin/response-json/
lucene/nutch/trunk/src/plugin/response-json/build.xml
lucene/nutch/trunk/src/plugin/response-json/lib/
lucene/nutch/trunk/src/plugin/response-json/lib/ezmorph-1.0.6.jar (with props)
lucene/nutch/trunk/src/plugin/response-json/lib/json-lib-2.2.2-jdk15.jar (with props)
lucene/nutch/trunk/src/plugin/response-json/plugin.xml
lucene/nutch/trunk/src/plugin/response-json/src/
lucene/nutch/trunk/src/plugin/response-json/src/java/
lucene/nutch/trunk/src/plugin/response-json/src/java/org/
lucene/nutch/trunk/src/plugin/response-json/src/java/org/apache/
lucene/nutch/trunk/src/plugin/response-json/src/java/org/apache/nutch/
lucene/nutch/trunk/src/plugin/response-json/src/java/org/apache/nutch/searcher/
lucene/nutch/trunk/src/plugin/response-json/src/java/org/apache/nutch/searcher/response/
lucene/nutch/trunk/src/plugin/response-json/src/java/org/apache/nutch/searcher/response/json/
lucene/nutch/trunk/src/plugin/response-json/src/java/org/apache/nutch/searcher/response/json/JSONResponseWriter.java
lucene/nutch/trunk/src/plugin/response-xml/
lucene/nutch/trunk/src/plugin/response-xml/build.xml
lucene/nutch/trunk/src/plugin/response-xml/plugin.xml
lucene/nutch/trunk/src/plugin/response-xml/src/
lucene/nutch/trunk/src/plugin/response-xml/src/java/
lucene/nutch/trunk/src/plugin/response-xml/src/java/org/
lucene/nutch/trunk/src/plugin/response-xml/src/java/org/apache/
lucene/nutch/trunk/src/plugin/response-xml/src/java/org/apache/nutch/
lucene/nutch/trunk/src/plugin/response-xml/src/java/org/apache/nutch/searcher/
lucene/nutch/trunk/src/plugin/response-xml/src/java/org/apache/nutch/searcher/response/
lucene/nutch/trunk/src/plugin/response-xml/src/java/org/apache/nutch/searcher/response/xml/
lucene/nutch/trunk/src/plugin/response-xml/src/java/org/apache/nutch/searcher/response/xml/XMLResponseWriter.java
Modified:
lucene/nutch/trunk/CHANGES.txt
lucene/nutch/trunk/build.xml
lucene/nutch/trunk/conf/nutch-default.xml
lucene/nutch/trunk/src/plugin/build.xml
lucene/nutch/trunk/src/plugin/nutch-extensionpoints/plugin.xml
lucene/nutch/trunk/src/web/web.xml
Modified: lucene/nutch/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/CHANGES.txt?rev=730845&r1=730844&r2=730845&view=diff
==============================================================================
--- lucene/nutch/trunk/CHANGES.txt (original)
+++ lucene/nutch/trunk/CHANGES.txt Fri Jan 2 13:38:58 2009
@@ -300,6 +300,9 @@
111. NUTCH-646 - New Indexing Framework for Nutch. (kubes)
112. NUTCH-668 - Domain URL Filter. (kubes)
+
+113. NUTCH-594 - Serve Nutch search results in multiple formats including
+ XML and JSON. (kubes)
Release 0.9 - 2007-04-02
Modified: lucene/nutch/trunk/build.xml
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/build.xml?rev=730845&r1=730844&r2=730845&view=diff
==============================================================================
--- lucene/nutch/trunk/build.xml (original)
+++ lucene/nutch/trunk/build.xml Fri Jan 2 13:38:58 2009
@@ -172,32 +172,34 @@
<outputproperty name="indent" value="yes"/>
</xslt>
<war destfile="${build.dir}/${final.name}.war"
- webxml="${web.src.dir}/web.xml">
+ webxml="${web.src.dir}/web.xml">
<fileset dir="${web.src.dir}/jsp"/>
<zipfileset dir="${docs.src}" includes="include/*.html"/>
<zipfileset dir="${build.docs}" includes="*/include/*.html"/>
<fileset dir="${docs.dir}"/>
<lib dir="${lib.dir}">
- <include name="lucene*.jar"/>
- <include name="taglibs-*.jar"/>
- <include name="hadoop-*.jar"/>
- <include name="dom4j-*.jar"/>
- <include name="xerces-*.jar"/>
- <include name="tika-*.jar"/>
+ <include name="lucene*.jar"/>
+ <include name="taglibs-*.jar"/>
+ <include name="hadoop-*.jar"/>
+ <include name="dom4j-*.jar"/>
+ <include name="xerces-*.jar"/>
+ <include name="tika-*.jar"/>
+ <include name="commons-collections-*.jar"/>
+ <include name="commons-beanutils-*.jar"/>
<include name="commons-cli-*.jar"/>
<include name="commons-lang-*.jar"/>
<include name="commons-logging-*.jar"/>
<include name="log4j-*.jar"/>
</lib>
<lib dir="${build.dir}">
- <include name="${final.name}.jar"/>
+ <include name="${final.name}.jar"/>
</lib>
<classes dir="${conf.dir}" excludes="**/*.template"/>
<classes dir="${web.src.dir}/locale"/>
<classes file="${web.src.dir}/log4j.properties"/>
<zipfileset prefix="WEB-INF/classes/plugins" dir="${build.plugins}"/>
<webinf dir="${lib.dir}">
- <include name="taglibs-*.tld"/>
+ <include name="taglibs-*.tld"/>
</webinf>
</war>
</target>
Modified: lucene/nutch/trunk/conf/nutch-default.xml
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/conf/nutch-default.xml?rev=730845&r1=730844&r2=730845&view=diff
==============================================================================
--- lucene/nutch/trunk/conf/nutch-default.xml (original)
+++ lucene/nutch/trunk/conf/nutch-default.xml Fri Jan 2 13:38:58 2009
@@ -886,7 +886,7 @@
<property>
<name>plugin.includes</name>
- <value>protocol-http|urlfilter-regex|parse-(text|html|js)|index-(basic|anchor)|query-(basic|site|url)|summary-basic|scoring-opic|urlnormalizer-(pass|regex|basic)</value>
+ <value>protocol-http|urlfilter-regex|parse-(text|html|js)|index-(basic|anchor)|query-(basic|site|url)|response-(json|xml)|summary-basic|scoring-opic|urlnormalizer-(pass|regex|basic)</value>
<description>Regular expression naming plugin directory names to
include. Any plugin not matching this expression is excluded.
In any case you need at least include the nutch-extensionpoints plugin. By
@@ -1209,4 +1209,63 @@
</description>
</property>
+<!-- response writer properties -->
+
+<property>
+ <name>search.response.default.type</name>
+ <value>xml</value>
+ <description>
+ The default response type returned if none is specified.
+ </description>
+</property>
+
+<property>
+ <name>search.response.default.lang</name>
+ <value>en</value>
+ <description>
+ The default response language if none is specified.
+ </description>
+</property>
+
+<property>
+ <name>search.response.default.numrows</name>
+ <value>10</value>
+ <description>
+ The default number of rows to return if none is specified.
+ </description>
+</property>
+
+<property>
+ <name>search.response.default.dedupfield</name>
+ <value>site</value>
+ <description>
+ The default dedup field if none is specified.
+ </description>
+</property>
+
+<property>
+ <name>search.response.default.numdupes</name>
+ <value>1</value>
+ <description>
+ The default number of duplicates returned if none is specified.
+ </description>
+</property>
+
+<property>
+ <name>searcher.response.maxage</name>
+ <value>86400</value>
+ <description>
+ The maxage of a response in seconds. Used in caching headers.
+ </description>
+</property>
+
+<property>
+ <name>searcher.response.prettyprint</name>
+ <value>true</value>
+ <description>
+ Should the response output be pretty printed. Setting to true enables better
+ debugging, false removes unneeded spaces and gives better throughput.
+ </description>
+</property>
+
</configuration>
Added: lucene/nutch/trunk/lib/commons-beanutils-1.8.0.jar
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/lib/commons-beanutils-1.8.0.jar?rev=730845&view=auto
==============================================================================
Binary file - no diff available.
Propchange: lucene/nutch/trunk/lib/commons-beanutils-1.8.0.jar
------------------------------------------------------------------------------
svn:mime-type = application/octet-stream
Added: lucene/nutch/trunk/lib/commons-collections-3.2.1.jar
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/lib/commons-collections-3.2.1.jar?rev=730845&view=auto
==============================================================================
Binary file - no diff available.
Propchange: lucene/nutch/trunk/lib/commons-collections-3.2.1.jar
------------------------------------------------------------------------------
svn:mime-type = application/octet-stream
Added: lucene/nutch/trunk/src/java/org/apache/nutch/searcher/response/RequestUtils.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/searcher/response/RequestUtils.java?rev=730845&view=auto
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/searcher/response/RequestUtils.java (added)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/searcher/response/RequestUtils.java Fri Jan 2 13:38:58 2009
@@ -0,0 +1,72 @@
+package org.apache.nutch.searcher.response;
+
+import javax.servlet.http.HttpServletRequest;
+
+import org.apache.commons.lang.StringUtils;
+
+/**
+ * A set of utility methods for getting request paramters.
+ */
+public class RequestUtils {
+
+ public static boolean parameterExists(HttpServletRequest request, String param) {
+ String value = request.getParameter(param);
+ return value != null;
+ }
+
+ public static Integer getIntegerParameter(HttpServletRequest request,
+ String param) {
+ if (parameterExists(request, param)) {
+ String value = request.getParameter(param);
+ if (StringUtils.isNotBlank(value) && StringUtils.isNumeric(value)) {
+ return new Integer(value);
+ }
+ }
+ return null;
+ }
+
+ public static Integer getIntegerParameter(HttpServletRequest request,
+ String param, Integer def) {
+ Integer value = getIntegerParameter(request, param);
+ return (value == null) ? def : value;
+ }
+
+ public static String getStringParameter(HttpServletRequest request,
+ String param) {
+ if (parameterExists(request, param)) {
+ return request.getParameter(param);
+ }
+ return null;
+ }
+
+ public static String getStringParameter(HttpServletRequest request,
+ String param, String def) {
+ String value = getStringParameter(request, param);
+ return (value == null) ? def : value;
+ }
+
+ public static Boolean getBooleanParameter(HttpServletRequest request,
+ String param) {
+ if (parameterExists(request, param)) {
+ String value = request.getParameter(param);
+ if (StringUtils.isNotBlank(value)
+ && (StringUtils.equals(param, "1")
+ || StringUtils.equalsIgnoreCase(param, "true") || StringUtils.equalsIgnoreCase(
+ param, "yes"))) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ public static Boolean getBooleanParameter(HttpServletRequest request,
+ String param, Boolean def) {
+ if (parameterExists(request, param)) {
+ String value = request.getParameter(param);
+ return (StringUtils.isNotBlank(value) && (StringUtils.equals(param, "1")
+ || StringUtils.equalsIgnoreCase(param, "true") || StringUtils.equalsIgnoreCase(
+ param, "yes")));
+ }
+ return def;
+ }
+}
Added: lucene/nutch/trunk/src/java/org/apache/nutch/searcher/response/ResponseWriter.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/searcher/response/ResponseWriter.java?rev=730845&view=auto
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/searcher/response/ResponseWriter.java (added)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/searcher/response/ResponseWriter.java Fri Jan 2 13:38:58 2009
@@ -0,0 +1,43 @@
+package org.apache.nutch.searcher.response;
+
+import java.io.IOException;
+
+import javax.servlet.http.HttpServletRequest;
+import javax.servlet.http.HttpServletResponse;
+
+import org.apache.hadoop.conf.Configurable;
+import org.apache.nutch.plugin.Pluggable;
+
+/**
+ * Nutch extension point which allow writing search results in many different
+ * output formats.
+ */
+public interface ResponseWriter
+ extends Pluggable, Configurable {
+
+ public final static String X_POINT_ID = ResponseWriter.class.getName();
+
+ /**
+ * Sets the returned content MIME type. Populated though variables set in
+ * the plugin.xml file of the ResponseWriter. This allows easily changing
+ * output content types, for example for JSON from text/plain during tesing
+ * and debugging to application/json in production.
+ *
+ * @param contentType The MIME content type to set.
+ */
+ public void setContentType(String contentType);
+
+ /**
+ * Writes out the search results response to the HttpServletResponse.
+ *
+ * @param results The SearchResults object containing hits and other info.
+ * @param request The HttpServletRequest object.
+ * @param response The HttpServletResponse object.
+ *
+ * @throws IOException If an error occurs while writing out the response.
+ */
+ public void writeResponse(SearchResults results, HttpServletRequest request,
+ HttpServletResponse response)
+ throws IOException;
+
+}
Added: lucene/nutch/trunk/src/java/org/apache/nutch/searcher/response/ResponseWriters.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/searcher/response/ResponseWriters.java?rev=730845&view=auto
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/searcher/response/ResponseWriters.java (added)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/searcher/response/ResponseWriters.java Fri Jan 2 13:38:58 2009
@@ -0,0 +1,90 @@
+package org.apache.nutch.searcher.response;
+
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.nutch.plugin.Extension;
+import org.apache.nutch.plugin.ExtensionPoint;
+import org.apache.nutch.plugin.PluginRepository;
+import org.apache.nutch.plugin.PluginRuntimeException;
+import org.apache.nutch.util.ObjectCache;
+
+/**
+ * Utility class for getting all ResponseWriter implementations and for
+ * returning the correct ResponseWriter for a given request type.
+ */
+public class ResponseWriters {
+
+ private Map<String, ResponseWriter> responseWriters;
+
+ /**
+ * Constructor that configures the cache of ResponseWriter objects.
+ *
+ * @param conf The Nutch configuration object.
+ */
+ public ResponseWriters(Configuration conf) {
+
+ // get the cache and the cache key
+ String cacheKey = ResponseWriter.class.getName();
+ ObjectCache objectCache = ObjectCache.get(conf);
+ this.responseWriters = (Map<String, ResponseWriter>)objectCache.getObject(cacheKey);
+
+ // if already populated do nothing
+ if (this.responseWriters == null) {
+
+ try {
+
+ // get the extension point and all ResponseWriter extensions
+ ExtensionPoint point = PluginRepository.get(conf).getExtensionPoint(
+ ResponseWriter.X_POINT_ID);
+ if (point == null) {
+ throw new RuntimeException(ResponseWriter.X_POINT_ID + " not found.");
+ }
+
+ // populate content type on the ResponseWriter classes, each response
+ // writer can handle more than one response type
+ Extension[] extensions = point.getExtensions();
+ Map<String, ResponseWriter> writers = new HashMap<String, ResponseWriter>();
+ for (int i = 0; i < extensions.length; i++) {
+ Extension extension = extensions[i];
+ ResponseWriter writer = (ResponseWriter)extension.getExtensionInstance();
+ String[] responseTypes = extension.getAttribute("responseType").split(
+ ",");
+ String contentType = extension.getAttribute("contentType");
+ writer.setContentType(contentType);
+ for (int k = 0; k < responseTypes.length; k++) {
+ writers.put(responseTypes[k], writer);
+ }
+ }
+
+ // set null object if no writers, otherwise set the writers
+ if (writers == null) {
+ objectCache.setObject(cacheKey, new HashMap<String, ResponseWriter>());
+ }
+ else {
+ objectCache.setObject(cacheKey, writers);
+ }
+ }
+ catch (PluginRuntimeException e) {
+ throw new RuntimeException(e);
+ }
+
+ // set the response writers map
+ this.responseWriters = (Map<String, ResponseWriter>)objectCache.getObject(cacheKey);
+ }
+ }
+
+ /**
+ * Return the correct ResponseWriter object for the response type.
+ *
+ * @param respType The response type, such as xml or json. Must correspond to
+ * the value set in the plugin.xml file for the ResponseWriter extension.
+ *
+ * @return The ResponseWriter that handles that response type or null if no
+ * such object exists.
+ */
+ public ResponseWriter getResponseWriter(String respType) {
+ return responseWriters.get(respType);
+ }
+}
Added: lucene/nutch/trunk/src/java/org/apache/nutch/searcher/response/SearchResults.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/searcher/response/SearchResults.java?rev=730845&view=auto
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/searcher/response/SearchResults.java (added)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/searcher/response/SearchResults.java Fri Jan 2 13:38:58 2009
@@ -0,0 +1,140 @@
+package org.apache.nutch.searcher.response;
+
+import org.apache.nutch.searcher.Hit;
+import org.apache.nutch.searcher.HitDetails;
+import org.apache.nutch.searcher.Summary;
+
+public class SearchResults {
+
+ private String[] fields;
+ private String responseType;
+ private String query;
+ private String lang;
+ private String sort;
+ private boolean reverse;
+ private boolean withSummary = true;
+ private int start;
+ private int rows;
+ private int end;
+ private long totalHits;
+ private Hit[] hits;
+ private HitDetails[] details;
+ private Summary[] summaries;
+
+ public SearchResults() {
+
+ }
+
+ public String[] getFields() {
+ return fields;
+ }
+
+ public void setFields(String[] fields) {
+ this.fields = fields;
+ }
+
+ public boolean isWithSummary() {
+ return withSummary;
+ }
+
+ public void setWithSummary(boolean withSummary) {
+ this.withSummary = withSummary;
+ }
+
+ public String getResponseType() {
+ return responseType;
+ }
+
+ public void setResponseType(String responseType) {
+ this.responseType = responseType;
+ }
+
+ public String getQuery() {
+ return query;
+ }
+
+ public void setQuery(String query) {
+ this.query = query;
+ }
+
+ public String getLang() {
+ return lang;
+ }
+
+ public void setLang(String lang) {
+ this.lang = lang;
+ }
+
+ public String getSort() {
+ return sort;
+ }
+
+ public void setSort(String sort) {
+ this.sort = sort;
+ }
+
+ public boolean isReverse() {
+ return reverse;
+ }
+
+ public void setReverse(boolean reverse) {
+ this.reverse = reverse;
+ }
+
+ public int getStart() {
+ return start;
+ }
+
+ public void setStart(int start) {
+ this.start = start;
+ }
+
+ public int getRows() {
+ return rows;
+ }
+
+ public void setRows(int rows) {
+ this.rows = rows;
+ }
+
+ public int getEnd() {
+ return end;
+ }
+
+ public void setEnd(int end) {
+ this.end = end;
+ }
+
+ public long getTotalHits() {
+ return totalHits;
+ }
+
+ public void setTotalHits(long totalHits) {
+ this.totalHits = totalHits;
+ }
+
+ public Hit[] getHits() {
+ return hits;
+ }
+
+ public void setHits(Hit[] hits) {
+ this.hits = hits;
+ }
+
+ public HitDetails[] getDetails() {
+ return details;
+ }
+
+ public void setDetails(HitDetails[] details) {
+ this.details = details;
+ }
+
+ public Summary[] getSummaries() {
+ return summaries;
+ }
+
+ public void setSummaries(Summary[] summaries) {
+ this.summaries = summaries;
+ }
+
+}
Added: lucene/nutch/trunk/src/java/org/apache/nutch/searcher/response/SearchServlet.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/searcher/response/SearchServlet.java?rev=730845&view=auto
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/searcher/response/SearchServlet.java (added)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/searcher/response/SearchServlet.java Fri Jan 2 13:38:58 2009
@@ -0,0 +1,196 @@
+package org.apache.nutch.searcher.response;
+
+import java.io.IOException;
+
+import javax.servlet.ServletConfig;
+import javax.servlet.ServletException;
+import javax.servlet.http.HttpServlet;
+import javax.servlet.http.HttpServletRequest;
+import javax.servlet.http.HttpServletResponse;
+
+import org.apache.commons.lang.StringUtils;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.nutch.searcher.Hit;
+import org.apache.nutch.searcher.HitDetails;
+import org.apache.nutch.searcher.Hits;
+import org.apache.nutch.searcher.NutchBean;
+import org.apache.nutch.searcher.Query;
+import org.apache.nutch.searcher.Summary;
+import org.apache.nutch.util.NutchConfiguration;
+
+/**
+ * Servlet that allows returning search results in multiple different formats
+ * through a ResponseWriter Nutch extension point.
+ *
+ * @see org.apache.nutch.searcher.response.ResponseWriter
+ */
+public class SearchServlet
+ extends HttpServlet {
+
+ public static final Log LOG = LogFactory.getLog(SearchServlet.class);
+ private NutchBean bean;
+ private Configuration conf;
+ private ResponseWriters writers;
+
+ private String defaultRespType = "xml";
+ private String defaultLang = null;
+ private int defaultNumRows = 10;
+ private String defaultDedupField = "site";
+ private int defaultNumDupes = 1;
+
+ public static final String RESPONSE_TYPE = "rt";
+ public static final String QUERY = "query";
+ public static final String LANG = "lang";
+ public static final String START = "start";
+ public static final String ROWS = "rows";
+ public static final String SORT = "sort";
+ public static final String REVERSE = "reverse";
+ public static final String DEDUPE = "ddf";
+ public static final String NUM_DUPES = "dupes";
+ public static final String SUMMARY = "summary";
+ public static final String FIELDS = "field";
+
+ /**
+ * Initializes servlet configuration default values. Gets NutchBean and
+ * ResponseWriters.
+ */
+ public void init(ServletConfig config)
+ throws ServletException {
+
+ // set sensible defaults for response writer values and cache NutchBean.
+ // Also get and cache all ResponseWriter implementations.
+ super.init(config);
+ try {
+ this.conf = NutchConfiguration.get(config.getServletContext());
+ this.defaultRespType = conf.get("search.response.default.type", "xml");
+ this.defaultLang = conf.get("search.response.default.lang");
+ this.defaultNumRows = conf.getInt("search.response.default.numrows", 10);
+ this.defaultDedupField = conf.get("search.response.default.dedupfield",
+ "site");
+ this.defaultNumDupes = conf.getInt("search.response.default.numdupes", 1);
+ bean = NutchBean.get(config.getServletContext(), this.conf);
+ writers = new ResponseWriters(conf);
+ }
+ catch (IOException e) {
+ throw new ServletException(e);
+ }
+ }
+
+ /**
+ * Forwards all responses to doGet.
+ */
+ protected void doPost(HttpServletRequest request, HttpServletResponse response)
+ throws ServletException, IOException {
+ doGet(request, response);
+ }
+
+ /**
+ * Handles all search requests. Gets parameter input. Does the search and
+ * gets Hits, details, and summaries. Passes off to ResponseWriter classes
+ * to writer different output formats directly to HttpServletResponse.
+ */
+ protected void doGet(HttpServletRequest request, HttpServletResponse response)
+ throws ServletException, IOException {
+
+ if (NutchBean.LOG.isInfoEnabled()) {
+ NutchBean.LOG.info("Query request from " + request.getRemoteAddr());
+ }
+
+ // get the response type, used to call the correct ResponseWriter
+ String respType = RequestUtils.getStringParameter(request, RESPONSE_TYPE,
+ defaultRespType);
+ ResponseWriter writer = writers.getResponseWriter(respType);
+ if (writer == null) {
+ throw new IOException("Unknown response type " + respType);
+ }
+
+ // get the query
+ String query = RequestUtils.getStringParameter(request, QUERY);
+ if (StringUtils.isBlank(query)) {
+ throw new IOException("Query cannot be empty!");
+ }
+
+ // get the language from parameter, then request, then finally configuration
+ String lang = RequestUtils.getStringParameter(request, LANG);
+ if (StringUtils.isBlank(lang)) {
+ lang = request.getLocale().getLanguage();
+ if (StringUtils.isBlank(lang)) {
+ lang = defaultLang;
+ }
+ }
+
+ // get various other search parameters, fields allows only returning a
+ // given set of fields
+ boolean withSummary = RequestUtils.getBooleanParameter(request, SUMMARY,
+ true);
+ String sort = RequestUtils.getStringParameter(request, SORT);
+ int start = RequestUtils.getIntegerParameter(request, START, 0);
+ int rows = RequestUtils.getIntegerParameter(request, ROWS, defaultNumRows);
+ boolean reverse = RequestUtils.getBooleanParameter(request, REVERSE, false);
+ String dedup = RequestUtils.getStringParameter(request, DEDUPE,
+ defaultDedupField);
+ int numDupes = RequestUtils.getIntegerParameter(request, NUM_DUPES,
+ defaultNumDupes);
+ String[] fields = request.getParameterValues(FIELDS);
+
+ // parse out the query
+ Query queryObj = Query.parse(query, lang, this.conf);
+ if (NutchBean.LOG.isInfoEnabled()) {
+ NutchBean.LOG.info("query: " + query);
+ NutchBean.LOG.info("lang: " + lang);
+ }
+
+ // search and return hits
+ Hits hits;
+ try {
+ hits = bean.search(queryObj, start + rows, numDupes, dedup, sort, reverse);
+ }
+ catch (IOException e) {
+ if (NutchBean.LOG.isWarnEnabled()) {
+ NutchBean.LOG.warn("Search Error", e);
+ }
+ hits = new Hits(0, new Hit[0]);
+ }
+
+ // get the total number of hits, the hits to show, and the hit details
+ long totalHits = hits.getTotal();
+ int end = (int)Math.min(hits.getLength(), start + rows);
+ int numHits = (end > start) ? (end - start) : 0;
+ Hit[] show = hits.getHits(start, numHits);
+ HitDetails[] details = bean.getDetails(show);
+
+ // setup the SearchResults object, used in response writing
+ SearchResults results = new SearchResults();
+ results.setResponseType(respType);
+ results.setQuery(query);
+ results.setLang(lang);
+ results.setSort(sort);
+ results.setReverse(reverse);
+ results.setStart(start);
+ results.setRows(rows);
+ results.setEnd(end);
+ results.setTotalHits(totalHits);
+ results.setHits(show);
+ results.setDetails(details);
+
+ // are we returning summaries with results, if not avoid network hit
+ if (withSummary) {
+ Summary[] summaries = bean.getSummary(details, queryObj);
+ results.setSummaries(summaries);
+ results.setWithSummary(true);
+ }
+ else {
+ results.setWithSummary(false);
+ }
+
+ // set return fields if any specified, if not all fields are returned
+ if (fields != null && fields.length > 0) {
+ results.setFields(fields);
+ }
+
+ // call the response writer to write out content to HttpResponse directly
+ writer.writeResponse(results, request, response);
+ }
+}
Modified: lucene/nutch/trunk/src/plugin/build.xml
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/build.xml?rev=730845&r1=730844&r2=730845&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/build.xml (original)
+++ lucene/nutch/trunk/src/plugin/build.xml Fri Jan 2 13:38:58 2009
@@ -68,6 +68,8 @@
<ant dir="query-site" target="deploy"/>
<ant dir="query-custom" target="deploy"/>
<ant dir="query-url" target="deploy"/>
+ <ant dir="response-json" target="deploy"/>
+ <ant dir="response-xml" target="deploy"/>
<ant dir="scoring-opic" target="deploy"/>
<ant dir="scoring-link" target="deploy"/>
<ant dir="summary-basic" target="deploy"/>
@@ -105,7 +107,7 @@
<ant dir="parse-pdf" target="test"/>
<ant dir="parse-rss" target="test"/>
<ant dir="feed" target="test"/>
- <!-- <ant dir="parse-rtf" target="test"/> -->
+ <!-- <ant dir="parse-rtf" target="test"/> -->
<ant dir="parse-swf" target="test"/>
<ant dir="parse-zip" target="test"/>
<ant dir="query-url" target="test"/>
@@ -168,6 +170,8 @@
<ant dir="query-site" target="clean"/>
<ant dir="query-url" target="clean"/>
<ant dir="query-custom" target="clean"/>
+ <ant dir="response-json" target="clean"/>
+ <ant dir="response-xml" target="clean"/>
<ant dir="scoring-opic" target="clean"/>
<ant dir="scoring-link" target="clean"/>
<ant dir="subcollection" target="clean"/>
Modified: lucene/nutch/trunk/src/plugin/nutch-extensionpoints/plugin.xml
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/nutch-extensionpoints/plugin.xml?rev=730845&r1=730844&r2=730845&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/nutch-extensionpoints/plugin.xml (original)
+++ lucene/nutch/trunk/src/plugin/nutch-extensionpoints/plugin.xml Fri Jan 2 13:38:58 2009
@@ -69,6 +69,10 @@
name="Nutch Analysis"/>
<extension-point
+ id="org.apache.nutch.searcher.response.ResponseWriter"
+ name="Nutch Search Results Response Writer"/>
+
+<extension-point
id="org.apache.nutch.searcher.Summarizer"
name="Nutch Summarizer"/>
Added: lucene/nutch/trunk/src/plugin/response-json/build.xml
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/response-json/build.xml?rev=730845&view=auto
==============================================================================
--- lucene/nutch/trunk/src/plugin/response-json/build.xml (added)
+++ lucene/nutch/trunk/src/plugin/response-json/build.xml Fri Jan 2 13:38:58 2009
@@ -0,0 +1,22 @@
+<?xml version="1.0"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<project name="response-json" default="jar-core">
+
+ <import file="../build-plugin.xml"/>
+
+</project>
Added: lucene/nutch/trunk/src/plugin/response-json/lib/ezmorph-1.0.6.jar
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/response-json/lib/ezmorph-1.0.6.jar?rev=730845&view=auto
==============================================================================
Binary file - no diff available.
Propchange: lucene/nutch/trunk/src/plugin/response-json/lib/ezmorph-1.0.6.jar
------------------------------------------------------------------------------
svn:mime-type = application/octet-stream
Added: lucene/nutch/trunk/src/plugin/response-json/lib/json-lib-2.2.2-jdk15.jar
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/response-json/lib/json-lib-2.2.2-jdk15.jar?rev=730845&view=auto
==============================================================================
Binary file - no diff available.
Propchange: lucene/nutch/trunk/src/plugin/response-json/lib/json-lib-2.2.2-jdk15.jar
------------------------------------------------------------------------------
svn:mime-type = application/octet-stream
Added: lucene/nutch/trunk/src/plugin/response-json/plugin.xml
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/response-json/plugin.xml?rev=730845&view=auto
==============================================================================
--- lucene/nutch/trunk/src/plugin/response-json/plugin.xml (added)
+++ lucene/nutch/trunk/src/plugin/response-json/plugin.xml Fri Jan 2 13:38:58 2009
@@ -0,0 +1,62 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<!--
+ ! JSON-lib is a java library for transforming beans, maps, collections, java
+ ! arrays and XML to JSON and back again. EZMorph is one of its dependencies.
+ ! Both are licensed under the apache license.
+ !
+ ! JSON-lib Project: http://json-lib.sourceforge.net/index.html
+ ! JSON-lib Download: http://sourceforge.net/project/showfiles.php?group_id=171425
+ ! License: http://json-lib.sourceforge.net/license.html
+ !
+ ! EZMorph Project: http://ezmorph.sourceforge.net/license.html
+ ! EZMorph Download: http://sourceforge.net/project/showfiles.php?group_id=174866
+ ! License: http://ezmorph.sourceforge.net/license.html
+ !-->
+<plugin
+ id="response-json"
+ name="JSON Response Writer Plug-in"
+ version="1.0.0"
+ provider-name="nutch.org">
+
+ <runtime>
+ <library name="response-json.jar">
+ <export name="*"/>
+ </library>
+ <library name="ezmorph-1.0.6.jar"/>
+ <library name="json-lib-2.2.2-jdk15.jar"/>
+ </runtime>
+
+ <requires>
+ <import plugin="nutch-extensionpoints"/>
+ </requires>
+
+ <extension id="org.apache.nutch.searcher.response"
+ name="ResponseWriter"
+ point="org.apache.nutch.searcher.response.ResponseWriter">
+
+ <implementation id="org.apache.nutch.searcher.response.json.JSONResponseWriter"
+ class="org.apache.nutch.searcher.response.json.JSONResponseWriter">
+ <parameter name="responseType" value="json"/>
+ <!--<parameter name="contentType" value="application/json"/>-->
+ <parameter name="contentType" value="text/plain"/>
+ </implementation>
+
+ </extension>
+
+</plugin>
Added: lucene/nutch/trunk/src/plugin/response-json/src/java/org/apache/nutch/searcher/response/json/JSONResponseWriter.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/response-json/src/java/org/apache/nutch/searcher/response/json/JSONResponseWriter.java?rev=730845&view=auto
==============================================================================
--- lucene/nutch/trunk/src/plugin/response-json/src/java/org/apache/nutch/searcher/response/json/JSONResponseWriter.java (added)
+++ lucene/nutch/trunk/src/plugin/response-json/src/java/org/apache/nutch/searcher/response/json/JSONResponseWriter.java Fri Jan 2 13:38:58 2009
@@ -0,0 +1,141 @@
+package org.apache.nutch.searcher.response.json;
+
+import java.io.IOException;
+import java.text.SimpleDateFormat;
+import java.util.HashSet;
+import java.util.Set;
+
+import javax.servlet.http.HttpServletRequest;
+import javax.servlet.http.HttpServletResponse;
+
+import net.sf.json.JSONArray;
+import net.sf.json.JSONObject;
+
+import org.apache.commons.lang.StringUtils;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.nutch.searcher.Hit;
+import org.apache.nutch.searcher.HitDetails;
+import org.apache.nutch.searcher.Summary;
+import org.apache.nutch.searcher.response.RequestUtils;
+import org.apache.nutch.searcher.response.ResponseWriter;
+import org.apache.nutch.searcher.response.SearchResults;
+
+/**
+ * A ResponseWriter implementation that returns search results in JSON format.
+ */
+public class JSONResponseWriter
+ implements ResponseWriter {
+
+ private String contentType = null;
+ private Configuration conf;
+ private int maxAgeInSeconds;
+ private boolean prettyPrint = true;
+
+ public void setContentType(String contentType) {
+ this.contentType = contentType;
+ }
+
+ public Configuration getConf() {
+ return conf;
+ }
+
+ public void setConf(Configuration conf) {
+ this.conf = conf;
+ this.maxAgeInSeconds = conf.getInt("searcher.response.maxage", 86400);
+ this.prettyPrint = conf.getBoolean("searcher.response.prettyprint", true);
+ }
+
+ public void writeResponse(SearchResults results, HttpServletRequest request,
+ HttpServletResponse response)
+ throws IOException {
+
+ // the function name, if any wrapping the JSON output
+ String func = RequestUtils.getStringParameter(request, "func");
+
+ // create the JSON object and add common values
+ JSONObject jsonObj = new JSONObject();
+ jsonObj.accumulate("query", results.getQuery());
+ jsonObj.accumulate("lang", results.getLang());
+ jsonObj.accumulate("sort", results.getSort());
+ jsonObj.accumulate("reverse", results.isReverse());
+ jsonObj.accumulate("start", results.getStart());
+ jsonObj.accumulate("end", results.getEnd());
+ jsonObj.accumulate("rows", results.getRows());
+ jsonObj.accumulate("totalhits", results.getTotalHits());
+ jsonObj.accumulate("withSummary", results.isWithSummary());
+
+ String[] searchFields = results.getFields();
+ Set<String> fieldSet = new HashSet<String>();
+ if (searchFields != null && searchFields.length > 0) {
+ jsonObj.accumulate("fields", StringUtils.join(searchFields, ","));
+ for (int i = 0; i < searchFields.length; i++) {
+ fieldSet.add(searchFields[i]);
+ }
+ }
+
+ // add the documents from search hits
+ JSONArray docsAr = new JSONArray();
+ HitDetails[] details = results.getDetails();
+ Hit[] hits = results.getHits();
+ Summary[] summaries = results.getSummaries();
+ for (int i = 0; i < details.length; i++) {
+
+ // every document has an indexno and an indexdocno
+ JSONObject result = new JSONObject();
+ HitDetails detail = details[i];
+ Hit hit = hits[i];
+ result.accumulate("indexno", hit.getIndexNo());
+ result.accumulate("indexdocno", hit.getIndexDocNo());
+
+ // don't add summaries not including summaries
+ if (summaries != null && results.isWithSummary()) {
+ Summary summary = summaries[i];
+ result.accumulate("summary", summary.toString());
+ }
+
+ // add the fields from hit details
+ JSONObject fields = new JSONObject();
+ for (int k = 0; k < detail.getLength(); k++) {
+ String name = detail.getField(k);
+ String[] values = detail.getValues(name);
+
+ // if we specified fields to return, only return those fields
+ if (fieldSet.size() == 0 || fieldSet.contains(name)) {
+ JSONArray valuesAr = new JSONArray();
+ for (int m = 0; m < values.length; m++) {
+ valuesAr.add(values[m]);
+ }
+ fields.accumulate(name, valuesAr);
+ }
+ }
+ result.accumulate("fields", fields);
+ docsAr.add(result);
+ }
+
+ jsonObj.accumulate("documents", docsAr);
+
+ // pretty printing can be set through configuration, write out the wrapper
+ // function if there is one
+ StringBuilder builder = new StringBuilder();
+ if (StringUtils.isNotBlank(func)) {
+ builder.append(func + "(");
+ }
+ builder.append(prettyPrint ? jsonObj.toString(2) : jsonObj.toString());
+ if (StringUtils.isNotBlank(func)) {
+ builder.append(")");
+ }
+
+ // Cache control headers
+ SimpleDateFormat sdf = new SimpleDateFormat("E, d MMM yyyy HH:mm:ss 'GMT'");
+ long relExpiresInMillis = System.currentTimeMillis()
+ + (1000 * maxAgeInSeconds);
+ response.setContentType(contentType);
+ response.setHeader("Cache-Control", "max-age=" + maxAgeInSeconds);
+ response.setHeader("Expires", sdf.format(relExpiresInMillis));
+
+ // write out the content to the response
+ response.getOutputStream().write(builder.toString().getBytes());
+ response.flushBuffer();
+ }
+
+}
Added: lucene/nutch/trunk/src/plugin/response-xml/build.xml
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/response-xml/build.xml?rev=730845&view=auto
==============================================================================
--- lucene/nutch/trunk/src/plugin/response-xml/build.xml (added)
+++ lucene/nutch/trunk/src/plugin/response-xml/build.xml Fri Jan 2 13:38:58 2009
@@ -0,0 +1,22 @@
+<?xml version="1.0"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<project name="response-xml" default="jar-core">
+
+ <import file="../build-plugin.xml"/>
+
+</project>
Added: lucene/nutch/trunk/src/plugin/response-xml/plugin.xml
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/response-xml/plugin.xml?rev=730845&view=auto
==============================================================================
--- lucene/nutch/trunk/src/plugin/response-xml/plugin.xml (added)
+++ lucene/nutch/trunk/src/plugin/response-xml/plugin.xml Fri Jan 2 13:38:58 2009
@@ -0,0 +1,46 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<plugin
+ id="response-xml"
+ name="XML Response Writer Plug-in"
+ version="1.0.0"
+ provider-name="nutch.org">
+
+ <runtime>
+ <library name="response-xml.jar">
+ <export name="*"/>
+ </library>
+ </runtime>
+
+ <requires>
+ <import plugin="nutch-extensionpoints"/>
+ </requires>
+
+ <extension id="org.apache.nutch.searcher.response"
+ name="ResponseWriter"
+ point="org.apache.nutch.searcher.response.ResponseWriter">
+
+ <implementation id="org.apache.nutch.searcher.response.xml.XMLResponseWriter"
+ class="org.apache.nutch.searcher.response.xml.XMLResponseWriter">
+ <parameter name="responseType" value="xml"/>
+ <parameter name="contentType" value="text/xml"/>
+ </implementation>
+
+ </extension>
+
+</plugin>
Added: lucene/nutch/trunk/src/plugin/response-xml/src/java/org/apache/nutch/searcher/response/xml/XMLResponseWriter.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/response-xml/src/java/org/apache/nutch/searcher/response/xml/XMLResponseWriter.java?rev=730845&view=auto
==============================================================================
--- lucene/nutch/trunk/src/plugin/response-xml/src/java/org/apache/nutch/searcher/response/xml/XMLResponseWriter.java (added)
+++ lucene/nutch/trunk/src/plugin/response-xml/src/java/org/apache/nutch/searcher/response/xml/XMLResponseWriter.java Fri Jan 2 13:38:58 2009
@@ -0,0 +1,267 @@
+package org.apache.nutch.searcher.response.xml;
+
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.text.SimpleDateFormat;
+import java.util.HashSet;
+import java.util.Set;
+
+import javax.servlet.http.HttpServletRequest;
+import javax.servlet.http.HttpServletResponse;
+import javax.xml.parsers.DocumentBuilderFactory;
+import javax.xml.transform.OutputKeys;
+import javax.xml.transform.Transformer;
+import javax.xml.transform.TransformerFactory;
+import javax.xml.transform.dom.DOMSource;
+import javax.xml.transform.stream.StreamResult;
+
+import org.apache.commons.lang.StringUtils;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.nutch.html.Entities;
+import org.apache.nutch.searcher.Hit;
+import org.apache.nutch.searcher.HitDetails;
+import org.apache.nutch.searcher.Summary;
+import org.apache.nutch.searcher.response.ResponseWriter;
+import org.apache.nutch.searcher.response.SearchResults;
+import org.w3c.dom.Attr;
+import org.w3c.dom.Document;
+import org.w3c.dom.Element;
+import org.w3c.dom.Node;
+
+/**
+ * A ResponseWriter implementation that returns search results in XML format.
+ */
+public class XMLResponseWriter
+ implements ResponseWriter {
+
+ private String contentType = null;
+ private Configuration conf;
+ private int maxAgeInSeconds;
+ private boolean prettyPrint;
+
+ /**
+ * Creates and returns a new node within the XML document.
+ *
+ * @param doc The XML document.
+ * @param parent The parent Node.
+ * @param name The name of the new node.
+ *
+ * @return The newly created node Element.
+ */
+ private static Element addNode(Document doc, Node parent, String name) {
+ Element child = doc.createElement(name);
+ parent.appendChild(child);
+ return child;
+ }
+
+ /**
+ * Creates and returns a new node within the XML document. The node contains
+ * the text supplied as a child node.
+ *
+ * @param doc The XML document.
+ * @param parent The parent Node.
+ * @param name The name of the new node.
+ * @param text A text string to append as a child node.
+ *
+ * @return The newly created node Element.
+ */
+ private static void addNode(Document doc, Node parent, String name,
+ String text) {
+ Element child = doc.createElement(name);
+ child.appendChild(doc.createTextNode(getLegalXml(text)));
+ parent.appendChild(child);
+ }
+
+ /**
+ * Adds an attribute name and value to a node Element in the XML document.
+ *
+ * @param doc The XML document.
+ * @param node The node Element on which to attach the attribute.
+ * @param name The name of the attribute.
+ * @param value The value of the attribute.
+ */
+ private static void addAttribute(Document doc, Element node, String name,
+ String value) {
+ Attr attribute = doc.createAttribute(name);
+ attribute.setValue(getLegalXml(value));
+ node.getAttributes().setNamedItem(attribute);
+ }
+
+ /**
+ * Transforms and returns the text string as legal XML text.
+ *
+ * @param text The text to transform.
+ *
+ * @return The text string in the form of legal XML text.
+ */
+ protected static String getLegalXml(String text) {
+
+ if (text == null) {
+ return null;
+ }
+ StringBuffer buffer = null;
+ for (int i = 0; i < text.length(); i++) {
+ char c = text.charAt(i);
+ if (!isLegalXml(c)) {
+ if (buffer == null) {
+ buffer = new StringBuffer(text.length());
+ buffer.append(text.substring(0, i));
+ }
+ }
+ else {
+ if (buffer != null) {
+ buffer.append(c);
+ }
+ }
+ }
+ return (buffer != null) ? buffer.toString() : text;
+ }
+
+ /**
+ * Determines if the character is a legal XML character.
+ *
+ * @param c The character to check.
+ *
+ * @return True if the character is legal xml, false otherwise.
+ */
+ private static boolean isLegalXml(final char c) {
+ return c == 0x9 || c == 0xa || c == 0xd || (c >= 0x20 && c <= 0xd7ff)
+ || (c >= 0xe000 && c <= 0xfffd) || (c >= 0x10000 && c <= 0x10ffff);
+ }
+
+ public void setContentType(String contentType) {
+ this.contentType = contentType;
+ }
+
+ public Configuration getConf() {
+ return conf;
+ }
+
+ public void setConf(Configuration conf) {
+ this.conf = conf;
+ this.maxAgeInSeconds = conf.getInt("searcher.response.maxage", 86400);
+ this.prettyPrint = conf.getBoolean("searcher.response.prettyprint", true);
+ }
+
+ public void writeResponse(SearchResults results, HttpServletRequest request,
+ HttpServletResponse response)
+ throws IOException {
+
+ try {
+
+ // create the xml document and add the results and search nodes
+ DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
+ Document xmldoc = factory.newDocumentBuilder().newDocument();
+ Element resEl = addNode(xmldoc, xmldoc, "results");
+ Element searchEl = addNode(xmldoc, resEl, "search");
+
+ // add common nodes
+ String query = results.getQuery();
+ addNode(xmldoc, searchEl, "query", query);
+ addNode(xmldoc, searchEl, "totalhits",
+ String.valueOf(results.getTotalHits()));
+ String lang = results.getLang();
+ if (lang != null) {
+ addNode(xmldoc, searchEl, "lang", lang);
+ }
+ String sort = results.getSort();
+ if (sort != null) {
+ addNode(xmldoc, searchEl, "sort", sort);
+ }
+ addNode(xmldoc, searchEl, "reverse", results.isReverse() ? "true"
+ : "false");
+ addNode(xmldoc, searchEl, "start", String.valueOf(results.getStart()));
+ addNode(xmldoc, searchEl, "end", String.valueOf(results.getEnd()));
+ addNode(xmldoc, searchEl, "rows", String.valueOf(results.getRows()));
+ addNode(xmldoc, searchEl, "totalhits",
+ String.valueOf(results.getTotalHits()));
+ addNode(xmldoc, searchEl, "withSummary",
+ String.valueOf(results.isWithSummary()));
+
+ String[] searchFields = results.getFields();
+ Set<String> fieldSet = new HashSet<String>();
+ if (searchFields != null && searchFields.length > 0) {
+ addNode(xmldoc, searchEl, "fields", StringUtils.join(searchFields, ","));
+ for (int i = 0; i < searchFields.length; i++) {
+ fieldSet.add(searchFields[i]);
+ }
+ }
+
+ // add documents
+ Element documents = addNode(xmldoc, resEl, "documents");
+ HitDetails[] details = results.getDetails();
+ Hit[] hits = results.getHits();
+ Summary[] summaries = results.getSummaries();
+ for (int i = 0; i < details.length; i++) {
+
+ // every document has an indexno and an indexdocno
+ Element document = addNode(xmldoc, documents, "document");
+ addAttribute(xmldoc, document, "indexno",
+ String.valueOf(hits[i].getIndexNo()));
+ addAttribute(xmldoc, document, "indexdocno",
+ String.valueOf(hits[i].getIndexDocNo()));
+
+ // don't add summaries not including summaries
+ if (summaries != null && results.isWithSummary()) {
+ String encSumm = Entities.encode(summaries[i].toString());
+ addNode(xmldoc, document, "summary", encSumm);
+ }
+
+ // add the fields from hit details
+ Element fields = addNode(xmldoc, document, "fields");
+ HitDetails detail = details[i];
+ for (int j = 0; j < detail.getLength(); j++) {
+ String fieldName = detail.getField(j);
+ String[] fieldValues = detail.getValues(fieldName);
+
+ // if we specified fields to return, only return those fields
+ if (fieldSet.size() == 0 || fieldSet.contains(fieldName)) {
+ Element field = addNode(xmldoc, fields, "field");
+ addAttribute(xmldoc, field, "name", fieldName);
+ for (int k = 0; k < fieldValues.length; k++) {
+ String encFieldVal = Entities.encode(fieldValues[k]);
+ addNode(xmldoc, field, "value", encFieldVal);
+ }
+ }
+ }
+ }
+
+ // get the xml source and a transformer to print it out
+ DOMSource source = new DOMSource(xmldoc);
+ TransformerFactory transFactory = TransformerFactory.newInstance();
+ Transformer transformer = transFactory.newTransformer();
+
+ // pretty printing can be set through configuration
+ if (prettyPrint) {
+ transformer.setOutputProperty("indent", "yes");
+ transformer.setOutputProperty(OutputKeys.INDENT, "yes");
+ transformer.setOutputProperty(
+ "{http://xml.apache.org/xslt}indent-amount", "2");
+ }
+
+ // write out the content to a byte array
+ ByteArrayOutputStream baos = new ByteArrayOutputStream();
+ StreamResult result = new StreamResult(baos);
+ transformer.transform(source, result);
+ baos.flush();
+ baos.close();
+
+ // cache control headers
+ SimpleDateFormat sdf = new SimpleDateFormat(
+ "E, d MMM yyyy HH:mm:ss 'GMT'");
+ long relExpiresInMillis = System.currentTimeMillis()
+ + (1000 * maxAgeInSeconds);
+ response.setContentType(contentType);
+ response.setHeader("Cache-Control", "max-age=" + maxAgeInSeconds);
+ response.setHeader("Expires", sdf.format(relExpiresInMillis));
+
+ // write out the content to the response
+ response.getOutputStream().write(baos.toByteArray());
+ response.flushBuffer();
+ }
+ catch (Exception e) {
+ throw new IOException(e);
+ }
+
+ }
+}
Modified: lucene/nutch/trunk/src/web/web.xml
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/web/web.xml?rev=730845&r1=730844&r2=730845&view=diff
==============================================================================
--- lucene/nutch/trunk/src/web/web.xml (original)
+++ lucene/nutch/trunk/src/web/web.xml Fri Jan 2 13:38:58 2009
@@ -36,6 +36,11 @@
<servlet-class>org.apache.nutch.searcher.OpenSearchServlet</servlet-class>
</servlet>
+<servlet>
+ <servlet-name>SearchServlet</servlet-name>
+ <servlet-class>org.apache.nutch.searcher.response.SearchServlet</servlet-class>
+</servlet>
+
<servlet-mapping>
<servlet-name>Cached</servlet-name>
<url-pattern>/servlet/cached</url-pattern>
@@ -46,6 +51,11 @@
<url-pattern>/opensearch</url-pattern>
</servlet-mapping>
+<servlet-mapping>
+ <servlet-name>SearchServlet</servlet-name>
+ <url-pattern>/search</url-pattern>
+</servlet-mapping>
+
<welcome-file-list>
<welcome-file>search.html</welcome-file>
<welcome-file>index.html</welcome-file>