You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by si...@apache.org on 2006/06/19 19:30:14 UTC
svn commit: r415379 - in /lucene/nutch/trunk/contrib/web2:
plugins/web-caching-oscache/src/java/org/apache/nutch/cache/
plugins/web-caching-oscache/src/java/org/apache/nutch/webapp/
plugins/web-caching-oscache/src/java/org/apache/nutch/webapp/controlle...
Author: siren
Date: Mon Jun 19 10:30:13 2006
New Revision: 415379
URL: http://svn.apache.org/viewvc?rev=415379&view=rev
Log:
fixed caching to store entries to disk as promised
Added:
lucene/nutch/trunk/contrib/web2/plugins/web-caching-oscache/src/java/org/apache/nutch/cache/
lucene/nutch/trunk/contrib/web2/plugins/web-caching-oscache/src/java/org/apache/nutch/cache/CustomDiskPersistenceListener.java
Modified:
lucene/nutch/trunk/contrib/web2/plugins/web-caching-oscache/src/java/org/apache/nutch/webapp/CacheManager.java
lucene/nutch/trunk/contrib/web2/plugins/web-caching-oscache/src/java/org/apache/nutch/webapp/controller/CachingSearchController.java
lucene/nutch/trunk/contrib/web2/src/main/java/org/apache/nutch/webapp/common/Search.java
Added: lucene/nutch/trunk/contrib/web2/plugins/web-caching-oscache/src/java/org/apache/nutch/cache/CustomDiskPersistenceListener.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/contrib/web2/plugins/web-caching-oscache/src/java/org/apache/nutch/cache/CustomDiskPersistenceListener.java?rev=415379&view=auto
==============================================================================
--- lucene/nutch/trunk/contrib/web2/plugins/web-caching-oscache/src/java/org/apache/nutch/cache/CustomDiskPersistenceListener.java (added)
+++ lucene/nutch/trunk/contrib/web2/plugins/web-caching-oscache/src/java/org/apache/nutch/cache/CustomDiskPersistenceListener.java Mon Jun 19 10:30:13 2006
@@ -0,0 +1,12 @@
+package org.apache.nutch.cache;
+
+import com.opensymphony.oscache.plugins.diskpersistence.AbstractDiskPersistenceListener;
+
+public class CustomDiskPersistenceListener extends
+ AbstractDiskPersistenceListener {
+
+ protected char[] getCacheFileName(String arg0) {
+ return arg0.toCharArray();
+ }
+
+}
Modified: lucene/nutch/trunk/contrib/web2/plugins/web-caching-oscache/src/java/org/apache/nutch/webapp/CacheManager.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/contrib/web2/plugins/web-caching-oscache/src/java/org/apache/nutch/webapp/CacheManager.java?rev=415379&r1=415378&r2=415379&view=diff
==============================================================================
--- lucene/nutch/trunk/contrib/web2/plugins/web-caching-oscache/src/java/org/apache/nutch/webapp/CacheManager.java (original)
+++ lucene/nutch/trunk/contrib/web2/plugins/web-caching-oscache/src/java/org/apache/nutch/webapp/CacheManager.java Mon Jun 19 10:30:13 2006
@@ -15,21 +15,58 @@
*/
package org.apache.nutch.webapp;
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.DataInputStream;
+import java.io.DataOutputStream;
+import java.io.IOException;
+import java.io.Serializable;
+import java.util.Properties;
+import java.util.zip.GZIPInputStream;
+import java.util.zip.GZIPOutputStream;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.nutch.webapp.common.Search;
+import org.apache.nutch.webapp.common.ServiceLocator;
import com.opensymphony.oscache.base.Cache;
import com.opensymphony.oscache.base.CacheEntry;
import com.opensymphony.oscache.base.EntryRefreshPolicy;
import com.opensymphony.oscache.base.NeedsRefreshException;
+import com.opensymphony.oscache.general.GeneralCacheAdministrator;
/**
- * CacheManager for
+ * This class is responsible for configuring the used cache and
+ * delivering cached Search objects.
+ *
+ * Configuration parameters can be overrided with default nutch
+ * configuration mechanism.
+ *
+ * Search Objects are compressed for smaller space requirements.
*/
public class CacheManager {
+
+ public static class ByteBufferWrapper implements Serializable {
+
+ private static final long serialVersionUID = 1L;
+ byte[] contents;
+
+ public ByteBufferWrapper(final byte[] contents){
+ this.contents=contents;
+ }
+
+ public byte[] getContents(){
+ return contents;
+ }
+
+ }
- static final String CACHE_KEY="cache";
+ static final Log LOG=LogFactory.getLog(CacheManager.class);
+ static final String CACHE_KEY=CacheManager.class.getName();
+
class NutchRefreshPolicy implements EntryRefreshPolicy {
private static final long serialVersionUID = 1L;
@@ -41,17 +78,39 @@
EntryRefreshPolicy policy=new NutchRefreshPolicy();
+
Cache cache;
+ GeneralCacheAdministrator cacheadmin;
- protected CacheManager(){
- cache=new Cache(true,true,false,true,"com.opensymphony.oscache.base.algorithm.UnlimitedCache",Integer.MAX_VALUE);
+ protected CacheManager(Configuration conf){
+
+ Properties p=new Properties();
+
+ //use memory for caching
+ boolean cacheMemory=conf.getBoolean("cache.memory", false);
+ p.setProperty("cache.memory", Boolean.toString(cacheMemory));
+
+ //the persistence class used
+ String cachePersistenceClass=conf.get("cache.persistence.class","org.apache.nutch.cache.CustomDiskPersistenceListener");
+ p.setProperty("cache.persistence.class", cachePersistenceClass);
+
+ //where to store cache files (if file cache used)
+ String cachePath=conf.get("cache.path", ".");
+ p.setProperty("cache.path", cachePath);
+
+ //cacacity of cache (how many entries)
+ int cacheCapacity=conf.getInt("cache.capacity", 1000);
+ p.setProperty("cache.capacity", Integer.toString(cacheCapacity));
+
+ cacheadmin=new GeneralCacheAdministrator(p);
+ cache=cacheadmin.getCache();
}
- public static CacheManager getInstance(Configuration conf){
+ public synchronized static CacheManager getInstance(Configuration conf){
CacheManager cache=(CacheManager)conf.getObject(CACHE_KEY);
if(cache==null) {
- cache = new CacheManager();
+ cache = new CacheManager(conf);
conf.setObject(CACHE_KEY, cache);
}
@@ -64,8 +123,33 @@
* @return
* @throws NeedsRefreshException
*/
- public Search getSearch(String id) throws NeedsRefreshException {
- return (Search) cache.getFromCache(id);
+ public Search getSearch(String id, ServiceLocator locator) throws NeedsRefreshException {
+ Search search=null;
+
+ ByteBufferWrapper w=(ByteBufferWrapper)cache.getFromCache(id);
+ if(w!=null){
+
+
+ try {
+ long time=System.currentTimeMillis();
+ ByteArrayInputStream is=new ByteArrayInputStream(w.getContents());
+ GZIPInputStream gs = new GZIPInputStream(is);
+ DataInputStream dis = new DataInputStream(gs);
+
+ search = new Search(locator);
+ search.readFields(dis);
+ long delta=System.currentTimeMillis()-time;
+
+ if(LOG.isDebugEnabled()){
+ LOG.debug("Decompressing cache entry took: " + delta + "ms.");
+ }
+
+ search.init();
+ } catch (IOException e) {
+ LOG.info("Could not get cached object: " + e);
+ }
+ }
+ return search;
}
/**
@@ -75,7 +159,28 @@
* @param search the search to cache
*/
public void putSearch(String id, Search search){
- cache.putInCache(id,search,policy);
+ try {
+ long time=System.currentTimeMillis();
+ ByteArrayOutputStream bos=new ByteArrayOutputStream();
+ GZIPOutputStream gzos=new GZIPOutputStream(bos);
+ DataOutputStream oos=new DataOutputStream(gzos);
+ search.write(oos);
+ oos.flush();
+ oos.close();
+ gzos.close();
+ long delta=System.currentTimeMillis()-time;
+ ByteBufferWrapper wrap=new ByteBufferWrapper(bos.toByteArray());
+ if(LOG.isDebugEnabled()){
+ LOG.debug("Compressing cache entry took: " + delta + "ms.");
+ LOG.debug("size: " + wrap.getContents().length + " bytes");
+ }
+ cache.putInCache(id, wrap);
+ } catch (IOException e) {
+ LOG.info("cannot store object in cache: " + e);
+ }
}
+ public void cancelUpdate(String key) {
+ cache.cancelUpdate(key);
+ }
}
Modified: lucene/nutch/trunk/contrib/web2/plugins/web-caching-oscache/src/java/org/apache/nutch/webapp/controller/CachingSearchController.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/contrib/web2/plugins/web-caching-oscache/src/java/org/apache/nutch/webapp/controller/CachingSearchController.java?rev=415379&r1=415378&r2=415379&view=diff
==============================================================================
--- lucene/nutch/trunk/contrib/web2/plugins/web-caching-oscache/src/java/org/apache/nutch/webapp/controller/CachingSearchController.java (original)
+++ lucene/nutch/trunk/contrib/web2/plugins/web-caching-oscache/src/java/org/apache/nutch/webapp/controller/CachingSearchController.java Mon Jun 19 10:30:13 2006
@@ -25,6 +25,7 @@
import org.apache.nutch.webapp.CacheManager;
import org.apache.nutch.webapp.common.Search;
import org.apache.nutch.webapp.common.ServiceLocator;
+import org.apache.nutch.webapp.common.Startable;
import org.apache.nutch.webapp.controller.SearchController;
import org.apache.struts.tiles.ComponentContext;
@@ -34,37 +35,51 @@
* This naive search result caching implementation is just an example of
* extending the web ui.
*/
-public class CachingSearchController extends SearchController {
+public class CachingSearchController extends SearchController implements Startable {
+
+ CacheManager manager=null;
public void nutchPerform(ComponentContext tileContext,
HttpServletRequest request, HttpServletResponse response,
ServletContext servletContext) throws ServletException, IOException {
- Search search = null;
- boolean requiresUpdate = false;
-
- // key used for caching
- String key = request.getQueryString();
-
ServiceLocator locator = getServiceLocator(request);
-
- if (key != null) {
+ Search search;
+
+ // key used for caching results, should really be something else but a part of user
+ // definable String
+ String key = request.getQueryString().replace("?","_").replace("&","_");
+ StringBuffer cacheKey=new StringBuffer(key.length()*2);
+ for(int i=0;i<key.length();i++){
+ cacheKey.append(key.charAt(i)).append(java.io.File.separatorChar);
+ }
+
+ if(LOG.isDebugEnabled()){
+ LOG.debug("cache key:" + cacheKey);
+ }
+ if (cacheKey != null) {
try {
- search = CacheManager.getInstance(locator.getConfiguration())
- .getSearch(key);
+ search = manager.getSearch(cacheKey.toString(), locator);
request.setAttribute(Search.REQ_ATTR_SEARCH, search);
- LOG.info("Using cached");
+ if(LOG.isDebugEnabled()) {
+ LOG.debug("Using cached");
+ }
} catch (NeedsRefreshException e) {
- requiresUpdate = true;
- LOG.info("Cache update required");
+ try{
+ super.nutchPerform(tileContext, request, response, servletContext);
+ search = (Search) locator.getSearch();
+ manager.putSearch(cacheKey.toString(),
+ search);
+ } catch (Exception ex){
+ LOG.info("Cancelling update");
+ manager.cancelUpdate(cacheKey.toString());
+ }
}
}
- if (key!=null && (search == null || requiresUpdate)) {
- LOG.info("Cache miss");
- super.nutchPerform(tileContext, request, response, servletContext);
- search = (Search) locator.getSearch();
- CacheManager.getInstance(locator.getConfiguration()).putSearch(key,
- search);
- }
+ }
+
+ public void start(ServletContext servletContext) {
+ ServiceLocator locator=getServiceLocator(servletContext);
+ manager=CacheManager.getInstance(locator.getConfiguration());
}
}
Modified: lucene/nutch/trunk/contrib/web2/src/main/java/org/apache/nutch/webapp/common/Search.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/contrib/web2/src/main/java/org/apache/nutch/webapp/common/Search.java?rev=415379&r1=415378&r2=415379&view=diff
==============================================================================
--- lucene/nutch/trunk/contrib/web2/src/main/java/org/apache/nutch/webapp/common/Search.java (original)
+++ lucene/nutch/trunk/contrib/web2/src/main/java/org/apache/nutch/webapp/common/Search.java Mon Jun 19 10:30:13 2006
@@ -15,12 +15,15 @@
*/
package org.apache.nutch.webapp.common;
+import java.io.DataInput;
+import java.io.DataOutput;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.io.Writable;
import org.apache.nutch.html.Entities;
import org.apache.nutch.searcher.Hit;
import org.apache.nutch.searcher.HitDetails;
@@ -35,7 +38,10 @@
* results) might be a good candidate for caching ?
*
*/
-public class Search {
+public class Search implements Writable {
+
+ private static final long serialVersionUID = 1L;
+
public static final String REQ_ATTR_SEARCH="nutchSearch";
public static final Log LOG = LogFactory.getLog(Search.class);
@@ -104,18 +110,8 @@
int realEnd = (int) Math.min(hits.getLength(), getStartOffset()
+ getHitsRequired());
- int endOffset=hits.getLength();
-
+ init();
show = hits.getHits(getStartOffset(), realEnd - getStartOffset());
-
- navigationHelper = new NavigationHelper(startOffset, endOffset, hitsPerPage, hits
- .getTotal(), hits.totalIsExact());
-
- // set offset to next page to form so it get's to ui
- if (navigationHelper.hasNext()) {
- form.setValue(SearchForm.NAME_START, Long.toString(navigationHelper
- .getNextPageStart()));
- }
try {
details = locator.getNutchBean().getDetails(show);
@@ -126,6 +122,20 @@
}
}
+ public void init(){
+ int endOffset=hits.getLength();
+
+ navigationHelper = new NavigationHelper(startOffset, endOffset, hitsPerPage, hits
+ .getTotal(), hits.totalIsExact());
+
+ // set offset to next page to form so it get's to ui
+ if (navigationHelper.hasNext()) {
+ form.setValue(SearchForm.NAME_START, Long.toString(navigationHelper
+ .getNextPageStart()));
+ }
+ }
+
+
/**
* gets the results of search to display
*
@@ -156,6 +166,10 @@
}
return ret;
}
+
+ public Search(){
+
+ }
public Search(ServiceLocator locator) {
this.locator = locator;
@@ -463,5 +477,55 @@
public void launchSearch() {
BaseSearch bs=new BaseSearch(locator);
bs.doSearch();
+ }
+
+ public void write(DataOutput out) throws IOException {
+ LOG.info("writing hits");
+ hits.write(out);
+
+
+ out.writeInt(show.length);
+
+ for(int i=0;i<show.length;i++){
+ show[i].write(out);
+ }
+
+ out.writeInt(details.length);
+ for(int i=0;i<details.length;i++){
+ details[i].write(out);
+ }
+
+ out.writeInt(summaries.length);
+ for(int i=0;i<summaries.length;i++){
+ summaries[i].write(out);
+ }
+
+ }
+
+ public void readFields(DataInput in) throws IOException {
+ hits=new Hits();
+ hits.readFields(in);
+ int showlength=in.readInt();
+ show=new Hit[showlength];
+ for(int i=0;i<showlength;i++){
+ show[i]=new Hit();
+ show[i].readFields(in);
+ }
+
+ int detailsLength=in.readInt();
+ details=new HitDetails[detailsLength];
+ for(int i=0;i<detailsLength;i++){
+ details[i]=new HitDetails();
+ details[i].readFields(in);
+ }
+
+ int summariesLength=in.readInt();
+ summaries=new Summary[summariesLength];
+ for(int i=0;i<summariesLength;i++){
+ summaries[i]=new Summary();
+ summaries[i].readFields(in);
+ }
+
+
}
}