You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by si...@apache.org on 2006/05/04 18:37:26 UTC
svn commit: r399758 - in /lucene/nutch/trunk/contrib/web2/src/main:
java/org/apache/nutch/webapp/controller/CachedController.java
resources/org/nutch/jsp/resources_en.properties
webapp/WEB-INF/jsp/cached.jsp webapp/WEB-INF/tiles-defs.xml
Author: siren
Date: Thu May 4 09:37:25 2006
New Revision: 399758
URL: http://svn.apache.org/viewcvs?rev=399758&view=rev
Log:
fixed cached page
Modified:
lucene/nutch/trunk/contrib/web2/src/main/java/org/apache/nutch/webapp/controller/CachedController.java
lucene/nutch/trunk/contrib/web2/src/main/resources/org/nutch/jsp/resources_en.properties
lucene/nutch/trunk/contrib/web2/src/main/webapp/WEB-INF/jsp/cached.jsp
lucene/nutch/trunk/contrib/web2/src/main/webapp/WEB-INF/tiles-defs.xml
Modified: lucene/nutch/trunk/contrib/web2/src/main/java/org/apache/nutch/webapp/controller/CachedController.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/contrib/web2/src/main/java/org/apache/nutch/webapp/controller/CachedController.java?rev=399758&r1=399757&r2=399758&view=diff
==============================================================================
--- lucene/nutch/trunk/contrib/web2/src/main/java/org/apache/nutch/webapp/controller/CachedController.java (original)
+++ lucene/nutch/trunk/contrib/web2/src/main/java/org/apache/nutch/webapp/controller/CachedController.java Thu May 4 09:37:25 2006
@@ -16,16 +16,17 @@
package org.apache.nutch.webapp.controller;
import java.io.IOException;
+import java.io.UnsupportedEncodingException;
import javax.servlet.ServletContext;
import javax.servlet.ServletException;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
+import org.apache.nutch.metadata.Metadata;
import org.apache.nutch.searcher.Hit;
import org.apache.nutch.searcher.HitDetails;
import org.apache.nutch.searcher.NutchBean;
-import org.apache.nutch.searcher.Query;
import org.apache.nutch.webapp.common.ServiceLocator;
import org.apache.struts.tiles.ComponentContext;
@@ -34,17 +35,56 @@
public void nutchPerform(ComponentContext tileContext,
HttpServletRequest request, HttpServletResponse response,
ServletContext servletContext) throws ServletException, IOException {
+
+
ServiceLocator locator = getServiceLocator(request);
NutchBean bean = locator.getNutchBean();
- Hit hit = new Hit(Integer.parseInt(request.getParameter("idx")), Integer
- .parseInt(request.getParameter("id")));
+ LOG.info("Cache request from " + request.getRemoteAddr());
+
+ Hit hit = new Hit(Integer.parseInt(request.getParameter("idx")),
+ Integer.parseInt(request.getParameter("id")));
+
HitDetails details = bean.getDetails(hit);
- Query query = Query.parse(request.getParameter("query"), locator
- .getConfiguration());
+ String id = "idx=" + hit.getIndexNo() + "&id=" + hit.getIndexDocNo();
- request.setAttribute("explanation", bean.getExplanation(query, hit));
- request.setAttribute("hitDetails", details);
- logRequestAttributes(request);
+ Metadata metaData = bean.getParseData(details).getContentMeta();
+
+ String content = null;
+ String contentType = (String) metaData.get(Metadata.CONTENT_TYPE);
+
+
+ if (contentType.startsWith("text/html")) {
+ // FIXME : it's better to emit the original 'byte' sequence
+ // with 'charset' set to the value of 'CharEncoding',
+ // but I don't know how to emit 'byte sequence' in JSP.
+ // out.getOutputStream().write(bean.getContent(details)) may work,
+ // but I'm not sure.
+ String encoding = (String) metaData.get("CharEncodingForConversion");
+ if (encoding != null) {
+ try {
+ content = new String(bean.getContent(details), encoding);
+ }
+ catch (UnsupportedEncodingException e) {
+ //fallback to configured charset
+ content = new String(bean.getContent(details), locator.getConfiguration().get("parser.character.encoding.default"));
+ }
+ }
+ else {
+ //construct String with system default encoding
+ content = new String(bean.getContent(details));
+ }
+ }
+
+ // page content
+ request.setAttribute("content", content);
+ // page content type
+ request.setAttribute("contentType", contentType);
+ // page url
+ request.setAttribute("url", details.getValue("url"));
+ // page id
+ request.setAttribute("id", id);
+ // page content if html
+ request.setAttribute("isHtml", new Boolean(contentType.startsWith("text/html")));
}
}
Modified: lucene/nutch/trunk/contrib/web2/src/main/resources/org/nutch/jsp/resources_en.properties
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/contrib/web2/src/main/resources/org/nutch/jsp/resources_en.properties?rev=399758&r1=399757&r2=399758&view=diff
==============================================================================
--- lucene/nutch/trunk/contrib/web2/src/main/resources/org/nutch/jsp/resources_en.properties (original)
+++ lucene/nutch/trunk/contrib/web2/src/main/resources/org/nutch/jsp/resources_en.properties Thu May 4 09:37:25 2006
@@ -14,6 +14,7 @@
cached.title=nutch cache
cached.page=page: <a href="{0}">{0}</a>
cached.noContent=Sorry, no content is cached for this page.
+cached.notHtml=The cached content has mime type {0}, click this <a href="servlet/cached?{1}">link</a> to download it directly.
#explain page title
explain.title=score explanation
Modified: lucene/nutch/trunk/contrib/web2/src/main/webapp/WEB-INF/jsp/cached.jsp
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/contrib/web2/src/main/webapp/WEB-INF/jsp/cached.jsp?rev=399758&r1=399757&r2=399758&view=diff
==============================================================================
--- lucene/nutch/trunk/contrib/web2/src/main/webapp/WEB-INF/jsp/cached.jsp (original)
+++ lucene/nutch/trunk/contrib/web2/src/main/webapp/WEB-INF/jsp/cached.jsp Thu May 4 09:37:25 2006
@@ -1,67 +1,28 @@
-<%@ page
- session="false"
- contentType="text/html; charset=UTF-8"
- import="java.io.*"
- import="java.util.*"
-
- import="org.apache.nutch.searcher.*"
- import="org.apache.nutch.parse.ParseData"
- import="org.apache.nutch.metadata.Metadata"
- import="org.apache.hadoop.conf.Configuration"
- import="org.apache.nutch.util.NutchConfiguration"
-%><%@ taglib prefix="bean" uri="/tags/struts-bean" %><%
- Configuration nutchConf = (Configuration) application.getAttribute(Configuration.class.getName());
- if (nutchConf == null) {
- nutchConf = NutchConfiguration.create();
- application.setAttribute(Configuration.class.getName(), nutchConf);
- }
- NutchBean bean = NutchBean.get(application, nutchConf);
- bean.LOG.info("cache request from " + request.getRemoteAddr());
- Hit hit = new Hit(Integer.parseInt(request.getParameter("idx")),
- Integer.parseInt(request.getParameter("id")));
- HitDetails details = bean.getDetails(hit);
- String id = "idx=" + hit.getIndexNo() + "&id=" + hit.getIndexDocNo();
-
- Metadata metaData = bean.getParseData(details).getContentMeta();
-
- String content = null;
- String contentType = (String) metaData.get(Metadata.CONTENT_TYPE);
- if (contentType.startsWith("text/html")) {
- // FIXME : it's better to emit the original 'byte' sequence
- // with 'charset' set to the value of 'CharEncoding',
- // but I don't know how to emit 'byte sequence' in JSP.
- // out.getOutputStream().write(bean.getContent(details)) may work,
- // but I'm not sure.
- String encoding = (String) metaData.get("CharEncodingForConversion");
- if (encoding != null) {
- try {
- content = new String(bean.getContent(details), encoding);
- }
- catch (UnsupportedEncodingException e) {
- // fallback to windows-1252
- content = new String(bean.getContent(details), "windows-1252");
- }
- }
- else
- content = new String(bean.getContent(details));
- }
-%>
-<base href="<%=details.getValue("url")%>">
+<%@ include file="common.jsp"%>
+<c:if test="${isHtml}">
+ <base href="<c:out value="${url}"/>">
+</c:if>
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
-<h2 style="{color: rgb(255, 153, 0)}"><bean:message key="cached.title"/></h2>
-<h3><bean:message key="cached.page" arg0="<%=details.getValue("url")%>" /></h3>
-<hr>
-<% if (contentType.startsWith("text/html")) {%>
-
-<% if (content != null && !content.equals("")) {%>
-<%= content %>
-<% } else { %>
-<bean:message key="cached.noContent"/>
-<% } %>
-
-<% } else { %>
-
-The cached content has mime type "<%=contentType%>",
-click this <a href="servlet/cached?<%=id%>">link</a> to download it directly.
-
-<% } %>
+<h2 style="{color: rgb(255, 153, 0)}"><fmt:message key="cached.title" /></h2>
+<h3><fmt:message key="cached.page">
+ <fmt:param value="${url}" />
+</fmt:message></h3>
+<hr />
+<c:choose>
+ <c:when test="${isHtml}">
+ <c:choose>
+ <c:when test="${content!=null && content!=''}">
+ <c:out value="${content}" escapeXml="false"/>
+ </c:when>
+ <c:otherwise>
+ <fmt:message key="cached.noContent" />
+ </c:otherwise>
+ </c:choose>
+ </c:when>
+ <c:otherwise>
+ <fmt:message key="cached.notHtml">
+ <fmt:param value="${contentType}" />
+ <fmt:param value="${id}" />
+ </fmt:message>
+ </c:otherwise>
+</c:choose>
Modified: lucene/nutch/trunk/contrib/web2/src/main/webapp/WEB-INF/tiles-defs.xml
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/contrib/web2/src/main/webapp/WEB-INF/tiles-defs.xml?rev=399758&r1=399757&r2=399758&view=diff
==============================================================================
--- lucene/nutch/trunk/contrib/web2/src/main/webapp/WEB-INF/tiles-defs.xml (original)
+++ lucene/nutch/trunk/contrib/web2/src/main/webapp/WEB-INF/tiles-defs.xml Thu May 4 09:37:25 2006
@@ -17,7 +17,8 @@
<put name="pageBody" value="/WEB-INF/jsp/search.jsp" />
</definition>
<!-- Cached -->
- <definition name="cachedPage" path="/WEB-INF/jsp/cached.jsp">
+ <definition name="cachedPage" path="/WEB-INF/jsp/cached.jsp"
+ controllerClass="org.apache.nutch.webapp.controller.CachedController">
<put name="title" value="cached.title" />
</definition>
<!-- Explain -->