You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by si...@apache.org on 2006/06/19 19:30:14 UTC

svn commit: r415379 - in /lucene/nutch/trunk/contrib/web2: plugins/web-caching-oscache/src/java/org/apache/nutch/cache/ plugins/web-caching-oscache/src/java/org/apache/nutch/webapp/ plugins/web-caching-oscache/src/java/org/apache/nutch/webapp/controlle...

Author: siren
Date: Mon Jun 19 10:30:13 2006
New Revision: 415379

URL: http://svn.apache.org/viewvc?rev=415379&view=rev
Log:
fixed caching to store entries to disk as promised

Added:
    lucene/nutch/trunk/contrib/web2/plugins/web-caching-oscache/src/java/org/apache/nutch/cache/
    lucene/nutch/trunk/contrib/web2/plugins/web-caching-oscache/src/java/org/apache/nutch/cache/CustomDiskPersistenceListener.java
Modified:
    lucene/nutch/trunk/contrib/web2/plugins/web-caching-oscache/src/java/org/apache/nutch/webapp/CacheManager.java
    lucene/nutch/trunk/contrib/web2/plugins/web-caching-oscache/src/java/org/apache/nutch/webapp/controller/CachingSearchController.java
    lucene/nutch/trunk/contrib/web2/src/main/java/org/apache/nutch/webapp/common/Search.java

Added: lucene/nutch/trunk/contrib/web2/plugins/web-caching-oscache/src/java/org/apache/nutch/cache/CustomDiskPersistenceListener.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/contrib/web2/plugins/web-caching-oscache/src/java/org/apache/nutch/cache/CustomDiskPersistenceListener.java?rev=415379&view=auto
==============================================================================
--- lucene/nutch/trunk/contrib/web2/plugins/web-caching-oscache/src/java/org/apache/nutch/cache/CustomDiskPersistenceListener.java (added)
+++ lucene/nutch/trunk/contrib/web2/plugins/web-caching-oscache/src/java/org/apache/nutch/cache/CustomDiskPersistenceListener.java Mon Jun 19 10:30:13 2006
@@ -0,0 +1,12 @@
+package org.apache.nutch.cache;
+
+import com.opensymphony.oscache.plugins.diskpersistence.AbstractDiskPersistenceListener;
+
+public class CustomDiskPersistenceListener extends
+    AbstractDiskPersistenceListener {
+
+  protected char[] getCacheFileName(String arg0) {
+    return arg0.toCharArray();
+  }
+
+}

Modified: lucene/nutch/trunk/contrib/web2/plugins/web-caching-oscache/src/java/org/apache/nutch/webapp/CacheManager.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/contrib/web2/plugins/web-caching-oscache/src/java/org/apache/nutch/webapp/CacheManager.java?rev=415379&r1=415378&r2=415379&view=diff
==============================================================================
--- lucene/nutch/trunk/contrib/web2/plugins/web-caching-oscache/src/java/org/apache/nutch/webapp/CacheManager.java (original)
+++ lucene/nutch/trunk/contrib/web2/plugins/web-caching-oscache/src/java/org/apache/nutch/webapp/CacheManager.java Mon Jun 19 10:30:13 2006
@@ -15,21 +15,58 @@
  */
 package org.apache.nutch.webapp;
 
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.DataInputStream;
+import java.io.DataOutputStream;
+import java.io.IOException;
+import java.io.Serializable;
+import java.util.Properties;
+import java.util.zip.GZIPInputStream;
+import java.util.zip.GZIPOutputStream;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.nutch.webapp.common.Search;
+import org.apache.nutch.webapp.common.ServiceLocator;
 
 import com.opensymphony.oscache.base.Cache;
 import com.opensymphony.oscache.base.CacheEntry;
 import com.opensymphony.oscache.base.EntryRefreshPolicy;
 import com.opensymphony.oscache.base.NeedsRefreshException;
+import com.opensymphony.oscache.general.GeneralCacheAdministrator;
 
 /**
- * CacheManager for 
+ * This class is responsible for configuring the used cache and
+ * delivering cached Search objects.
+ * 
+ * Configuration parameters can be overrided with default nutch
+ * configuration mechanism.
+ * 
+ * Search Objects are compressed for smaller space requirements.
  */
 public class CacheManager {
+  
+  public static class ByteBufferWrapper implements Serializable {
+    
+    private static final long serialVersionUID = 1L;
+    byte[] contents;
+    
+    public ByteBufferWrapper(final byte[] contents){
+      this.contents=contents;
+    }
+    
+    public byte[] getContents(){
+      return contents;
+    }
+    
+  }
 
-  static final String CACHE_KEY="cache";
+  static final Log LOG=LogFactory.getLog(CacheManager.class);
   
+  static final String CACHE_KEY=CacheManager.class.getName();
+
   class NutchRefreshPolicy implements EntryRefreshPolicy {
 
     private static final long serialVersionUID = 1L;
@@ -41,17 +78,39 @@
   
   EntryRefreshPolicy policy=new NutchRefreshPolicy();
 
+
   Cache cache;
+  GeneralCacheAdministrator cacheadmin;
   
-  protected CacheManager(){
-    cache=new Cache(true,true,false,true,"com.opensymphony.oscache.base.algorithm.UnlimitedCache",Integer.MAX_VALUE);
+  protected CacheManager(Configuration conf){
+    
+    Properties p=new Properties();
+    
+    //use memory for caching
+    boolean cacheMemory=conf.getBoolean("cache.memory", false);
+    p.setProperty("cache.memory", Boolean.toString(cacheMemory));
+    
+    //the persistence class used
+    String cachePersistenceClass=conf.get("cache.persistence.class","org.apache.nutch.cache.CustomDiskPersistenceListener");
+    p.setProperty("cache.persistence.class", cachePersistenceClass);
+
+    //where to store cache files (if file cache used)
+    String cachePath=conf.get("cache.path", ".");
+    p.setProperty("cache.path", cachePath);
+
+    //cacacity of cache (how many entries)
+    int cacheCapacity=conf.getInt("cache.capacity", 1000);
+    p.setProperty("cache.capacity", Integer.toString(cacheCapacity));
+    
+    cacheadmin=new GeneralCacheAdministrator(p);
+    cache=cacheadmin.getCache();
   }
   
-  public static CacheManager getInstance(Configuration conf){
+  public synchronized static CacheManager getInstance(Configuration conf){
     CacheManager cache=(CacheManager)conf.getObject(CACHE_KEY);
     
     if(cache==null) {
-      cache = new CacheManager();
+      cache = new CacheManager(conf);
       
       conf.setObject(CACHE_KEY, cache);
     }
@@ -64,8 +123,33 @@
    * @return
    * @throws NeedsRefreshException
    */
-  public Search getSearch(String id) throws NeedsRefreshException  {
-    return (Search) cache.getFromCache(id);
+  public Search getSearch(String id, ServiceLocator locator) throws NeedsRefreshException  {
+    Search search=null;
+    
+    ByteBufferWrapper w=(ByteBufferWrapper)cache.getFromCache(id);
+    if(w!=null){
+      
+
+      try {
+        long time=System.currentTimeMillis();
+        ByteArrayInputStream is=new ByteArrayInputStream(w.getContents());
+        GZIPInputStream gs = new GZIPInputStream(is);
+        DataInputStream dis = new DataInputStream(gs);
+
+        search = new Search(locator);
+        search.readFields(dis);
+        long delta=System.currentTimeMillis()-time;
+        
+        if(LOG.isDebugEnabled()){
+          LOG.debug("Decompressing cache entry took: " + delta + "ms.");
+        }
+
+        search.init();
+      } catch (IOException e) {
+        LOG.info("Could not get cached object: " + e);
+      }
+    } 
+    return search;
   }
 
   /**
@@ -75,7 +159,28 @@
    * @param search the search to cache
    */
   public void putSearch(String id, Search search){
-    cache.putInCache(id,search,policy);
+    try {
+      long time=System.currentTimeMillis();
+      ByteArrayOutputStream bos=new ByteArrayOutputStream();
+      GZIPOutputStream gzos=new GZIPOutputStream(bos);
+      DataOutputStream oos=new DataOutputStream(gzos);
+      search.write(oos);
+      oos.flush();
+      oos.close();
+      gzos.close();
+      long delta=System.currentTimeMillis()-time;
+      ByteBufferWrapper wrap=new ByteBufferWrapper(bos.toByteArray());
+      if(LOG.isDebugEnabled()){
+        LOG.debug("Compressing cache entry took: " + delta + "ms.");
+        LOG.debug("size: " + wrap.getContents().length + " bytes");
+      }
+      cache.putInCache(id, wrap);
+    } catch (IOException e) {
+      LOG.info("cannot store object in cache: " + e);
+    }
   }
 
+  public void cancelUpdate(String key) {
+    cache.cancelUpdate(key);
+  }
 }

Modified: lucene/nutch/trunk/contrib/web2/plugins/web-caching-oscache/src/java/org/apache/nutch/webapp/controller/CachingSearchController.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/contrib/web2/plugins/web-caching-oscache/src/java/org/apache/nutch/webapp/controller/CachingSearchController.java?rev=415379&r1=415378&r2=415379&view=diff
==============================================================================
--- lucene/nutch/trunk/contrib/web2/plugins/web-caching-oscache/src/java/org/apache/nutch/webapp/controller/CachingSearchController.java (original)
+++ lucene/nutch/trunk/contrib/web2/plugins/web-caching-oscache/src/java/org/apache/nutch/webapp/controller/CachingSearchController.java Mon Jun 19 10:30:13 2006
@@ -25,6 +25,7 @@
 import org.apache.nutch.webapp.CacheManager;
 import org.apache.nutch.webapp.common.Search;
 import org.apache.nutch.webapp.common.ServiceLocator;
+import org.apache.nutch.webapp.common.Startable;
 import org.apache.nutch.webapp.controller.SearchController;
 import org.apache.struts.tiles.ComponentContext;
 
@@ -34,37 +35,51 @@
  * This naive search result caching implementation is just an example of
  * extending the web ui.
  */
-public class CachingSearchController extends SearchController {
+public class CachingSearchController extends SearchController implements Startable {
+
+  CacheManager manager=null;
 
   public void nutchPerform(ComponentContext tileContext,
       HttpServletRequest request, HttpServletResponse response,
       ServletContext servletContext) throws ServletException, IOException {
 
-    Search search = null;
-    boolean requiresUpdate = false;
-
-    // key used for caching
-    String key = request.getQueryString();
-
     ServiceLocator locator = getServiceLocator(request);
-
-    if (key != null) {
+    Search search;
+    
+    // key used for caching results, should really be something else but a part of user
+    // definable String
+    String key = request.getQueryString().replace("?","_").replace("&","_");
+    StringBuffer cacheKey=new StringBuffer(key.length()*2);
+    for(int i=0;i<key.length();i++){
+      cacheKey.append(key.charAt(i)).append(java.io.File.separatorChar);
+    }
+    
+    if(LOG.isDebugEnabled()){
+      LOG.debug("cache key:" + cacheKey);
+    }
+    if (cacheKey != null) {
       try {
-        search = CacheManager.getInstance(locator.getConfiguration())
-            .getSearch(key);
+        search = manager.getSearch(cacheKey.toString(), locator);
         request.setAttribute(Search.REQ_ATTR_SEARCH, search);
-        LOG.info("Using cached");
+        if(LOG.isDebugEnabled()) {
+          LOG.debug("Using cached");
+        }
       } catch (NeedsRefreshException e) {
-        requiresUpdate = true;
-        LOG.info("Cache update required");
+        try{
+          super.nutchPerform(tileContext, request, response, servletContext);
+          search = (Search) locator.getSearch();
+          manager.putSearch(cacheKey.toString(),
+            search);
+        } catch (Exception ex){
+          LOG.info("Cancelling update");
+          manager.cancelUpdate(cacheKey.toString());
+        }
       }
     }
-    if (key!=null && (search == null || requiresUpdate)) {
-      LOG.info("Cache miss");
-      super.nutchPerform(tileContext, request, response, servletContext);
-      search = (Search) locator.getSearch();
-      CacheManager.getInstance(locator.getConfiguration()).putSearch(key,
-          search);
-    }
+  }
+
+  public void start(ServletContext servletContext) {
+    ServiceLocator locator=getServiceLocator(servletContext);
+    manager=CacheManager.getInstance(locator.getConfiguration());
   }
 }

Modified: lucene/nutch/trunk/contrib/web2/src/main/java/org/apache/nutch/webapp/common/Search.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/contrib/web2/src/main/java/org/apache/nutch/webapp/common/Search.java?rev=415379&r1=415378&r2=415379&view=diff
==============================================================================
--- lucene/nutch/trunk/contrib/web2/src/main/java/org/apache/nutch/webapp/common/Search.java (original)
+++ lucene/nutch/trunk/contrib/web2/src/main/java/org/apache/nutch/webapp/common/Search.java Mon Jun 19 10:30:13 2006
@@ -15,12 +15,15 @@
  */
 package org.apache.nutch.webapp.common;
 
+import java.io.DataInput;
+import java.io.DataOutput;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.List;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.io.Writable;
 import org.apache.nutch.html.Entities;
 import org.apache.nutch.searcher.Hit;
 import org.apache.nutch.searcher.HitDetails;
@@ -35,7 +38,10 @@
  * results) might be a good candidate for caching ?
  * 
  */
-public class Search {
+public class Search implements Writable {
+
+  private static final long serialVersionUID = 1L;
+  
   public static final String REQ_ATTR_SEARCH="nutchSearch";
   public static final Log LOG = LogFactory.getLog(Search.class);
 
@@ -104,18 +110,8 @@
     int realEnd = (int) Math.min(hits.getLength(), getStartOffset()
         + getHitsRequired());
 
-    int endOffset=hits.getLength();
-    
+    init();
     show = hits.getHits(getStartOffset(), realEnd - getStartOffset());
-    
-    navigationHelper = new NavigationHelper(startOffset, endOffset, hitsPerPage, hits
-        .getTotal(), hits.totalIsExact());
-
-    // set offset to next page to form so it get's to ui
-    if (navigationHelper.hasNext()) {
-      form.setValue(SearchForm.NAME_START, Long.toString(navigationHelper
-          .getNextPageStart()));
-    }
 
     try {
       details = locator.getNutchBean().getDetails(show);
@@ -126,6 +122,20 @@
     }
   }
 
+  public void init(){
+    int endOffset=hits.getLength();
+    
+    navigationHelper = new NavigationHelper(startOffset, endOffset, hitsPerPage, hits
+        .getTotal(), hits.totalIsExact());
+
+    // set offset to next page to form so it get's to ui
+    if (navigationHelper.hasNext()) {
+      form.setValue(SearchForm.NAME_START, Long.toString(navigationHelper
+          .getNextPageStart()));
+    }
+  }
+  
+  
   /**
    * gets the results of search to display
    * 
@@ -156,6 +166,10 @@
     }
     return ret;
   }
+  
+  public Search(){
+    
+  }
 
   public Search(ServiceLocator locator) {
     this.locator = locator;
@@ -463,5 +477,55 @@
   public void launchSearch() {
     BaseSearch bs=new BaseSearch(locator);
     bs.doSearch();
+  }
+
+  public void write(DataOutput out) throws IOException {
+    LOG.info("writing hits");
+    hits.write(out);
+    
+    
+    out.writeInt(show.length);
+
+    for(int i=0;i<show.length;i++){
+      show[i].write(out);
+    }
+
+    out.writeInt(details.length);
+    for(int i=0;i<details.length;i++){
+      details[i].write(out);
+    }
+
+    out.writeInt(summaries.length);
+    for(int i=0;i<summaries.length;i++){
+      summaries[i].write(out);
+    }
+
+  }
+
+  public void readFields(DataInput in) throws IOException {
+    hits=new Hits();
+    hits.readFields(in);
+    int showlength=in.readInt();
+    show=new Hit[showlength];
+    for(int i=0;i<showlength;i++){
+      show[i]=new Hit();
+      show[i].readFields(in);
+    }
+
+    int detailsLength=in.readInt();
+    details=new HitDetails[detailsLength];
+    for(int i=0;i<detailsLength;i++){
+      details[i]=new HitDetails();
+      details[i].readFields(in);
+    }
+
+    int summariesLength=in.readInt();
+    summaries=new Summary[summariesLength];
+    for(int i=0;i<summariesLength;i++){
+      summaries[i]=new Summary();
+      summaries[i].readFields(in);
+    }
+
+  
   }
 }