You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by si...@apache.org on 2006/05/04 18:37:26 UTC

svn commit: r399758 - in /lucene/nutch/trunk/contrib/web2/src/main: java/org/apache/nutch/webapp/controller/CachedController.java resources/org/nutch/jsp/resources_en.properties webapp/WEB-INF/jsp/cached.jsp webapp/WEB-INF/tiles-defs.xml

Author: siren
Date: Thu May  4 09:37:25 2006
New Revision: 399758

URL: http://svn.apache.org/viewcvs?rev=399758&view=rev
Log:
fixed cached page

Modified:
    lucene/nutch/trunk/contrib/web2/src/main/java/org/apache/nutch/webapp/controller/CachedController.java
    lucene/nutch/trunk/contrib/web2/src/main/resources/org/nutch/jsp/resources_en.properties
    lucene/nutch/trunk/contrib/web2/src/main/webapp/WEB-INF/jsp/cached.jsp
    lucene/nutch/trunk/contrib/web2/src/main/webapp/WEB-INF/tiles-defs.xml

Modified: lucene/nutch/trunk/contrib/web2/src/main/java/org/apache/nutch/webapp/controller/CachedController.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/contrib/web2/src/main/java/org/apache/nutch/webapp/controller/CachedController.java?rev=399758&r1=399757&r2=399758&view=diff
==============================================================================
--- lucene/nutch/trunk/contrib/web2/src/main/java/org/apache/nutch/webapp/controller/CachedController.java (original)
+++ lucene/nutch/trunk/contrib/web2/src/main/java/org/apache/nutch/webapp/controller/CachedController.java Thu May  4 09:37:25 2006
@@ -16,16 +16,17 @@
 package org.apache.nutch.webapp.controller;
 
 import java.io.IOException;
+import java.io.UnsupportedEncodingException;
 
 import javax.servlet.ServletContext;
 import javax.servlet.ServletException;
 import javax.servlet.http.HttpServletRequest;
 import javax.servlet.http.HttpServletResponse;
 
+import org.apache.nutch.metadata.Metadata;
 import org.apache.nutch.searcher.Hit;
 import org.apache.nutch.searcher.HitDetails;
 import org.apache.nutch.searcher.NutchBean;
-import org.apache.nutch.searcher.Query;
 import org.apache.nutch.webapp.common.ServiceLocator;
 import org.apache.struts.tiles.ComponentContext;
 
@@ -34,17 +35,56 @@
   public void nutchPerform(ComponentContext tileContext,
       HttpServletRequest request, HttpServletResponse response,
       ServletContext servletContext) throws ServletException, IOException {
+    
+    
     ServiceLocator locator = getServiceLocator(request);
     NutchBean bean = locator.getNutchBean();
 
-    Hit hit = new Hit(Integer.parseInt(request.getParameter("idx")), Integer
-        .parseInt(request.getParameter("id")));
+    LOG.info("Cache request from " + request.getRemoteAddr());
+    
+    Hit hit = new Hit(Integer.parseInt(request.getParameter("idx")),
+                      Integer.parseInt(request.getParameter("id")));
+    
     HitDetails details = bean.getDetails(hit);
-    Query query = Query.parse(request.getParameter("query"), locator
-        .getConfiguration());
+    String id = "idx=" + hit.getIndexNo() + "&id=" + hit.getIndexDocNo();
 
-    request.setAttribute("explanation", bean.getExplanation(query, hit));
-    request.setAttribute("hitDetails", details);
-    logRequestAttributes(request);
+    Metadata metaData = bean.getParseData(details).getContentMeta();
+
+    String content = null;
+    String contentType = (String) metaData.get(Metadata.CONTENT_TYPE);
+
+    
+    if (contentType.startsWith("text/html")) {
+      // FIXME : it's better to emit the original 'byte' sequence 
+      // with 'charset' set to the value of 'CharEncoding',
+      // but I don't know how to emit 'byte sequence' in JSP.
+      // out.getOutputStream().write(bean.getContent(details)) may work, 
+      // but I'm not sure.
+      String encoding = (String) metaData.get("CharEncodingForConversion"); 
+      if (encoding != null) {
+        try {
+          content = new String(bean.getContent(details), encoding);
+        }
+        catch (UnsupportedEncodingException e) {
+          //fallback to configured charset
+          content = new String(bean.getContent(details), locator.getConfiguration().get("parser.character.encoding.default"));
+        }
+      }
+      else {
+        //construct String with system default encoding
+        content = new String(bean.getContent(details));
+      }
+    }
+
+    // page content
+    request.setAttribute("content", content);
+    // page content type
+    request.setAttribute("contentType", contentType);
+    // page url
+    request.setAttribute("url", details.getValue("url"));
+    // page id
+    request.setAttribute("id", id);
+    // page content if html
+    request.setAttribute("isHtml", new Boolean(contentType.startsWith("text/html")));
   }
 }

Modified: lucene/nutch/trunk/contrib/web2/src/main/resources/org/nutch/jsp/resources_en.properties
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/contrib/web2/src/main/resources/org/nutch/jsp/resources_en.properties?rev=399758&r1=399757&r2=399758&view=diff
==============================================================================
--- lucene/nutch/trunk/contrib/web2/src/main/resources/org/nutch/jsp/resources_en.properties (original)
+++ lucene/nutch/trunk/contrib/web2/src/main/resources/org/nutch/jsp/resources_en.properties Thu May  4 09:37:25 2006
@@ -14,6 +14,7 @@
 cached.title=nutch cache
 cached.page=page: <a href="{0}">{0}</a>
 cached.noContent=Sorry, no content is cached for this page.
+cached.notHtml=The cached content has mime type {0}, click this <a href="servlet/cached?{1}">link</a> to download it directly.
 
 #explain page title
 explain.title=score explanation

Modified: lucene/nutch/trunk/contrib/web2/src/main/webapp/WEB-INF/jsp/cached.jsp
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/contrib/web2/src/main/webapp/WEB-INF/jsp/cached.jsp?rev=399758&r1=399757&r2=399758&view=diff
==============================================================================
--- lucene/nutch/trunk/contrib/web2/src/main/webapp/WEB-INF/jsp/cached.jsp (original)
+++ lucene/nutch/trunk/contrib/web2/src/main/webapp/WEB-INF/jsp/cached.jsp Thu May  4 09:37:25 2006
@@ -1,67 +1,28 @@
-<%@ page
-  session="false"
-  contentType="text/html; charset=UTF-8"
-  import="java.io.*"
-  import="java.util.*"
-
-  import="org.apache.nutch.searcher.*"
-  import="org.apache.nutch.parse.ParseData"
-  import="org.apache.nutch.metadata.Metadata"
-  import="org.apache.hadoop.conf.Configuration"
-  import="org.apache.nutch.util.NutchConfiguration"
-%><%@ taglib prefix="bean" uri="/tags/struts-bean" %><%
-  Configuration nutchConf = (Configuration) application.getAttribute(Configuration.class.getName());
-  if (nutchConf == null) {
-    nutchConf = NutchConfiguration.create();
-    application.setAttribute(Configuration.class.getName(), nutchConf);
-  }
-  NutchBean bean = NutchBean.get(application, nutchConf);
-  bean.LOG.info("cache request from " + request.getRemoteAddr());
-  Hit hit = new Hit(Integer.parseInt(request.getParameter("idx")),
-                    Integer.parseInt(request.getParameter("id")));
-  HitDetails details = bean.getDetails(hit);
-  String id = "idx=" + hit.getIndexNo() + "&id=" + hit.getIndexDocNo();
-
-  Metadata metaData = bean.getParseData(details).getContentMeta();
-
-  String content = null;
-  String contentType = (String) metaData.get(Metadata.CONTENT_TYPE);
-  if (contentType.startsWith("text/html")) {
-    // FIXME : it's better to emit the original 'byte' sequence 
-    // with 'charset' set to the value of 'CharEncoding',
-    // but I don't know how to emit 'byte sequence' in JSP.
-    // out.getOutputStream().write(bean.getContent(details)) may work, 
-    // but I'm not sure.
-    String encoding = (String) metaData.get("CharEncodingForConversion"); 
-    if (encoding != null) {
-      try {
-        content = new String(bean.getContent(details), encoding);
-      }
-      catch (UnsupportedEncodingException e) {
-        // fallback to windows-1252
-        content = new String(bean.getContent(details), "windows-1252");
-      }
-    }
-    else 
-      content = new String(bean.getContent(details));
-  }
-%>
-<base href="<%=details.getValue("url")%>">
+<%@ include file="common.jsp"%>
+<c:if test="${isHtml}">
+  <base href="<c:out value="${url}"/>">
+</c:if>
 <meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
-<h2 style="{color: rgb(255, 153, 0)}"><bean:message key="cached.title"/></h2>
-<h3><bean:message key="cached.page" arg0="<%=details.getValue("url")%>" /></h3>
-<hr>
-<% if (contentType.startsWith("text/html")) {%>
-
-<% if (content != null && !content.equals("")) {%>
-<%= content %>
-<% } else { %>
-<bean:message key="cached.noContent"/>
-<% } %>
-
-<% } else { %>
-
-The cached content has mime type "<%=contentType%>",
-click this <a href="servlet/cached?<%=id%>">link</a> to download it directly.
-
-<% } %>
+<h2 style="{color: rgb(255, 153, 0)}"><fmt:message key="cached.title" /></h2>
+<h3><fmt:message key="cached.page">
+	<fmt:param value="${url}" />
+</fmt:message></h3>
+<hr />
+<c:choose>
+	<c:when test="${isHtml}">
+		<c:choose>
+			<c:when test="${content!=null && content!=''}">
+				<c:out value="${content}" escapeXml="false"/>
+			</c:when>
+			<c:otherwise>
+				<fmt:message key="cached.noContent" />
+			</c:otherwise>
+		</c:choose>
+	</c:when>
+	<c:otherwise>
+		<fmt:message key="cached.notHtml">
+			<fmt:param value="${contentType}" />
+			<fmt:param value="${id}" />
+		</fmt:message>
+	</c:otherwise>
+</c:choose>

Modified: lucene/nutch/trunk/contrib/web2/src/main/webapp/WEB-INF/tiles-defs.xml
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/contrib/web2/src/main/webapp/WEB-INF/tiles-defs.xml?rev=399758&r1=399757&r2=399758&view=diff
==============================================================================
--- lucene/nutch/trunk/contrib/web2/src/main/webapp/WEB-INF/tiles-defs.xml (original)
+++ lucene/nutch/trunk/contrib/web2/src/main/webapp/WEB-INF/tiles-defs.xml Thu May  4 09:37:25 2006
@@ -17,7 +17,8 @@
 		<put name="pageBody" value="/WEB-INF/jsp/search.jsp" />
 	</definition>
 	<!-- Cached -->
-	<definition name="cachedPage" path="/WEB-INF/jsp/cached.jsp">
+	<definition name="cachedPage" path="/WEB-INF/jsp/cached.jsp"
+	controllerClass="org.apache.nutch.webapp.controller.CachedController">
 		<put name="title" value="cached.title" />
 	</definition>
 	<!-- Explain -->