You are viewing a plain text version of this content. The canonical link for it is here.
Posted to mod_python-commits@quetz.apache.org by nl...@apache.org on 2005/04/27 17:53:36 UTC

svn commit: r164998 - in /httpd/mod_python/trunk/lib/python/mod_python: cache.py publisher.py

Author: nlehuen
Date: Wed Apr 27 08:53:36 2005
New Revision: 164998

URL: http://svn.apache.org/viewcvs?rev=164998&view=rev
Log:
Tentative fix for MODPYTHON-8 and MODPYTHON-9.

Added:
    httpd/mod_python/trunk/lib/python/mod_python/cache.py   (with props)
Modified:
    httpd/mod_python/trunk/lib/python/mod_python/publisher.py

Added: httpd/mod_python/trunk/lib/python/mod_python/cache.py
URL: http://svn.apache.org/viewcvs/httpd/mod_python/trunk/lib/python/mod_python/cache.py?rev=164998&view=auto
==============================================================================
--- httpd/mod_python/trunk/lib/python/mod_python/cache.py (added)
+++ httpd/mod_python/trunk/lib/python/mod_python/cache.py Wed Apr 27 08:53:36 2005
@@ -0,0 +1,407 @@
+ #
+ # Copyright 2004 Apache Software Foundation 
+ # 
+ # Licensed under the Apache License, Version 2.0 (the "License"); you
+ # may not use this file except in compliance with the License.  You
+ # may obtain a copy of the License at
+ #
+ #      http://www.apache.org/licenses/LICENSE-2.0
+ #
+ # Unless required by applicable law or agreed to in writing, software
+ # distributed under the License is distributed on an "AS IS" BASIS,
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ # implied.  See the License for the specific language governing
+ # permissions and limitations under the License.
+ #
+ # Originally developed by Gregory Trubetskoy.
+ # 
+ # This was donated by Nicolas Lehuen, and also posted to the Python Cookbook
+ # http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/302997
+ # 
+ # $Id$
+
+# -*- coding: CP1252 -*-
+from threading import Lock
+from os import fstat
+from time import time,strptime
+from calendar import timegm
+import urllib2
+import re
+import weakref
+
+NOT_INITIALIZED = object()
+
+class Entry(object):
+    """ A cache entry, mostly an internal object. """
+    def __init__(self,key):
+        object.__init__(self)
+        self._key=key
+        self._value=NOT_INITIALIZED
+        self._lock=Lock()
+
+class Cache(object):
+    """ An abstract, multi-threaded cache object. """
+    
+    def __init__(self,max_size=0):
+        """ Builds a cache with a limit of max_size entries.
+            If this limit is exceeded, the Least Recently Used entry is discarded.
+            if max_size==0, the cache is unbounded (no LRU rule is applied).
+        """
+        object.__init__(self)
+        self._maxsize=max_size
+        self._dict={}
+        self._lock=Lock()
+        
+        # Header of the access list
+        if self._maxsize:
+            self._head=Entry(None)
+            self._head._previous=self._head
+            self._head._next=self._head
+
+    def __setitem__(self,name,value):
+        """ Populates the cache with a given name and value. """
+        self._lock.acquire()
+        try:
+            key = self.key(name)
+            entry = self._dict.get(key)
+            if not entry:
+                entry = Entry(key)
+                self._pack(entry,value)
+                self._dict[key]=entry
+                if self._maxsize:
+                    entry._next = entry._previous = None
+                    self._access(entry)
+                    self._checklru()
+            else:
+                self._pack(entry,value)
+                if self._maxsize:
+                    self._access(entry)
+            self.commit()
+        finally:
+            self._lock.release()
+
+    def __getitem__(self,name):
+        """ Gets a value from the cache, builds it if required. """
+        self._lock.acquire()
+        try:
+            key = self.key(name)
+            entry = self._dict.get(key)
+            if not entry:
+                entry = Entry(key)
+                self._dict[key]=entry
+                if self._maxsize:
+                    entry._next = entry._previous = None
+                    self._access(entry)
+                    self._checklru()
+            elif self._maxsize:
+                self._access(entry)
+        finally:
+            self._lock.release()
+
+        entry._lock.acquire()
+        try:
+            value = self._unpack(entry)
+            if value is NOT_INITIALIZED:
+                opened = self.check(name,entry)
+                value = self.build(name,opened,entry)
+                self._pack(entry,value)
+                self.commit()
+            else:
+                opened = self.check(name,entry)
+                if opened is not None:
+                    value = self.build(name,opened,entry)
+                    self._pack(entry,value)
+                    self.commit()
+            return value
+        finally:
+            entry._lock.release()
+
+    def __delitem__(self,key):
+        self._lock.acquire()
+        try:
+            key = self.key(key)
+            del self._dict[key]
+        finally:
+            self._lock.release()
+
+    def mru(self):
+        """ Returns the Most Recently Used key """
+        if self._maxsize:
+            self._lock.acquire()
+            try:
+                return self._head._previous._key
+            finally:
+                self._lock.release()
+        else:
+            return None
+
+    def lru(self):
+        """ Returns the Least Recently Used key """
+        if self._maxsize:
+            self._lock.acquire()
+            try:
+                return self._head._next._key
+            finally:
+                self._lock.release()
+        else:
+            return None
+
+    def key(self,name):
+        """ Override this method to extract a key from the name passed to the [] operator """
+        return name
+
+    def commit(self):
+        """ Override this method if you want to do something each time the underlying dictionary is modified (e.g. make it persistent). """
+        pass
+
+    def clear(self):
+        """ Clears the cache """
+        self._lock.acquire()
+        try:
+            self._dict.clear()
+            if self._maxsize:
+                self._head._next=self._head
+                self._head._previous=self._head
+        finally:
+            self._lock.release()
+
+    def check(self,name,entry):
+        """ Override this method to check whether the entry with the given name is stale. Return None if it is fresh
+            or an opened resource if it is stale. The object returned will be passed to the 'build' method as the 'opened' parameter.
+            Use the 'entry' parameter to store meta-data if required. Don't worry about multiple threads accessing the same name,
+            as this method is properly isolated.
+        """
+        return None
+
+    def build(self,name,opened,entry):
+        """ Build the cached value with the given name from the given opened resource. Use entry to obtain or store meta-data if needed.
+             Don't worry about multiple threads accessing the same name, as this method is properly isolated.
+        """
+        raise NotImplementedError()
+           
+    def _access(self,entry):
+        " Internal use only, must be invoked within a cache lock. Updates the access list. """
+        if entry._next is not self._head:
+            if entry._previous is not None:
+                # remove the entry from the access list
+                entry._previous._next=entry._next
+                entry._next._previous=entry._previous
+            # insert the entry at the end of the access list
+            entry._previous=self._head._previous
+            entry._previous._next=entry
+            entry._next=self._head
+            entry._next._previous=entry
+            if self._head._next is self._head:
+                self._head._next=entry
+
+    def _checklru(self):
+        " Internal use only, must be invoked within a cache lock. Removes the LRU entry if needed. """
+        if len(self._dict)>self._maxsize:
+            lru=self._head._next
+            lru._previous._next=lru._next
+            lru._next._previous=lru._previous
+            del self._dict[lru._key]
+
+    def _pack(self,entry,value):
+        """ Store the value in the entry. """
+        entry._value=value
+
+    def _unpack(self,entry):
+        """ Recover the value from the entry, returns NOT_INITIALIZED if it is not OK. """
+        return entry._value
+
+class WeakCache(Cache):
+    """ This cache holds weak references to the values it stores. Whenever a value is not longer
+        normally referenced, it is removed from the cache. Useful for sharing the result of long
+        computations but letting them go as soon as they are not needed by anybody.
+    """
+        
+    def _pack(self,entry,value):
+        entry._value=weakref.ref(value,lambda ref: self.__delitem__(entry._key))
+        
+    def _unpack(self,entry):
+        if entry._value is NOT_INITIALIZED:
+            return NOT_INITIALIZED
+            
+        value = entry._value()
+        if value is None:
+            return NOT_INITIALIZED
+        else:
+            return value
+
+class FileCache(Cache):
+    """ A file cache. Returns the content of the files as a string, given their filename.
+        Whenever the files are modified (according to their modification time) the cache is updated.
+        Override the build method to obtain more interesting behaviour.
+    """
+    def __init__(self,max_size=0,mode='rb'):
+        Cache.__init__(self,max_size)
+        self.mode=mode
+    
+    def check(self,name,entry):
+        """ Checks the modification time to determine whether a file has changed or not. """
+        f = file(name,self.mode)
+        fs = fstat(f.fileno())
+        ts1 = fs[-2]
+        try:
+            ts2 = entry._timestamp
+        except AttributeError:
+            ts2 = ts1-1
+                        
+        if ts2<ts1:
+            entry._timestamp=ts1
+            return f
+        else:
+            return None
+
+    def build(self,name,opened,entry):
+        """ Return the content of the file as a string. Override this for better behaviour. """
+        try:
+            return opened.read()
+        finally:
+            opened.close()
+
+def parseRFC822Time(t):
+    return timegm(strptime(t,"%a, %d %b %Y %H:%M:%S %Z"))
+
+re_max_age=re.compile('max-age\s*=\s*(\d+)',re.I)
+
+class HTTPEntity(object):
+    def __init__(self,entity,metadata):
+        self.entity=entity
+        self.metadata=metadata
+    
+    def __repr__(self):
+        return 'HTTPEntity(%s,%s)'%(repr(self.entity),self.metadata)
+        
+    def __str__(self):
+        return self.entity
+
+class HTTPCache(Cache):
+    """ An HTTP cache. Returns the entity found at the given URL.
+        Uses Expires, ETag and Last-Modified headers to minimize bandwidth usage.
+        Partial Cache-Control support (only max-age is supported).
+    """
+    def check(self,name,entry):
+        request = urllib2.Request(name)
+        
+        try:
+            if time()<entry._expires:
+                return None
+        except AttributeError:
+            pass            
+        try:
+            header, value = entry._validator
+            request.headers[header]=value
+        except AttributeError:
+            pass
+        opened = None
+        try:
+            opened = urllib2.urlopen(request)
+            headers = opened.info()
+
+            # expiration handling            
+            expiration = False
+            try:
+                match = re_max_age.match(headers['cache-control'])
+                if match:
+                        entry._expires=time()+int(match.group(1))
+                        expiration = True
+            except (KeyError,ValueError):
+                pass
+            if not expiration:
+                try:
+                    date = parseRFC822Time(headers['date'])
+                    expires = parseRFC822Time(headers['expires'])
+                    entry._expires = time()+(expires-date)
+                    expiration = True
+                except KeyError:
+                    pass
+            
+            # validator handling
+            validation = False
+            try:
+                entry._validator='If-None-Match',headers['etag']
+                validation = True
+            except KeyError:
+                pass
+            if not validation:
+                try:
+                    entry._validator='If-Modified-Since',headers['last-modified']
+                except KeyError:
+                    pass
+
+            return opened
+        except urllib2.HTTPError, error:
+            if opened: opened.close()
+            if error.code==304:
+                return None
+            else:
+                raise error
+
+    def build(self,name,opened,entry):
+        try:
+            return HTTPEntity(opened.read(),dict(opened.info()))
+        finally:
+            opened.close()
+
+class Module(object):
+    """ Placeholder object for the module definition. """
+    def __init__(self,filename):
+        self.__file__=filename
+    
+    def __repr__(self):
+        return '<%s object at 0x%08x from %s>'%(type(self).__name__,id(self),self.__file__)
+
+class ModuleCache(FileCache):
+    """ A module cache. Give it a file name, it returns a module-like object
+        which results from the execution of the Python script it contains.
+    """
+    def __init__(self,max_size=0):
+        FileCache.__init__(self,max_size,'r')
+    
+    def build(self,name,opened,entry):
+        try:
+            module = Module(name)
+            exec opened in module.__dict__
+            return module
+            # I used to use imp.load_source but right now I'm trying the stuff above
+            # return imp.load_source(re.sub('\W','_',name),name,opened)
+        finally:
+            opened.close()
+
+class HttpModuleCache(HTTPCache):
+    """ A module cache. Give it a file name, it returns a module-like object
+        which results from the execution of the Python script it contains.
+    """
+    def __init__(self,max_size=0):
+        HTTPCache.__init__(self,max_size)
+    
+    def build(self,name,opened,entry):
+        try:
+            module = Module(name)
+            text = opened.read().replace('\r\n','\n')
+            code = compile(text,name,'exec')
+            exec code in module.__dict__
+            return module
+            # I used to use imp.load_source but right now I'm trying the stuff above
+            # return imp.load_source(re.sub('\W','_',name),name,opened)
+        finally:
+            opened.close()
+
+class FunctionCache(Cache):
+    def __init__(self,function,max_size=0):
+        Cache.__init__(self,max_size)
+        self.function=function
+    
+    def __call__(self,*args,**kw):
+        if kw:
+            # a dict is not hashable so we build a tuple of (key,value) pairs
+            kw = tuple(kw.iteritems())
+            return self[args,kw]
+        else:
+            return self[args,()]
+    
+    def build(self,name,opened,entry):
+        args,kw = name
+        return self.function(*args,**dict(kw))

Propchange: httpd/mod_python/trunk/lib/python/mod_python/cache.py
------------------------------------------------------------------------------
    svn:keywords = Id

Modified: httpd/mod_python/trunk/lib/python/mod_python/publisher.py
URL: http://svn.apache.org/viewcvs/httpd/mod_python/trunk/lib/python/mod_python/publisher.py?rev=164998&r1=164997&r2=164998&view=diff
==============================================================================
--- httpd/mod_python/trunk/lib/python/mod_python/publisher.py (original)
+++ httpd/mod_python/trunk/lib/python/mod_python/publisher.py Wed Apr 27 08:53:36 2005
@@ -43,81 +43,93 @@
 
 imp_suffixes = " ".join([x[0][1:] for x in imp.get_suffixes()])
 
+from cache import ModuleCache, NOT_INITIALIZED
+
+class PageCache(ModuleCache):
+    """ This is the cache for page objects. Handles the automatic reloading of pages. """
+    
+    def key(self,req):
+        """ Extracts the filename from the request """
+        return req.filename
+    
+    def check(self,req,entry):
+        config = req.get_config()
+        autoreload=int(config.get("PythonAutoReload", 1))
+        if autoreload==0 and entry._value is not NOT_INITIALIZED:
+            # if we don't want to reload and we have a value,
+            # then we consider it fresh
+            return None
+        else:
+            return ModuleCache.check(self,req.filename,entry)
+
+    def build(self,req,opened,entry):
+        config = req.get_config()
+        log=int(config.get("PythonDebug", 0))
+        if log:
+            if entry._value is NOT_INITIALIZED:
+                req.log_error('Publisher loading page %s'%req.filename,apache.APLOG_NOTICE)
+            else:
+                req.log_error('Publisher reloading page %s'%req.filename,apache.APLOG_NOTICE)        
+        return ModuleCache.build(self,req,opened,entry)
+
+page_cache = PageCache()
+
 def handler(req):
 
     req.allow_methods(["GET", "POST", "HEAD"])
     if req.method not in ["GET", "POST", "HEAD"]:
         raise apache.SERVER_RETURN, apache.HTTP_METHOD_NOT_ALLOWED
 
-    func_path = ""
-    if req.path_info:
-        func_path = req.path_info[1:] # skip first /
-        func_path = func_path.replace("/", ".")
-        if func_path[-1:] == ".":
-            func_path = func_path[:-1] 
-
-    # default to 'index' if no path_info was given
-    if not func_path:
-        func_path = "index"
-
-    # if any part of the path begins with "_", abort
-    # We need to make this test here, before resolve_object,
-    # to prevent the loading of modules whose name begins with
-    # an underscore.
-    if func_path[0] == '_' or func_path.count("._"):
-        req.log_error('Cannot access %s because '
-                      'it contains at least an underscore'
-                      % func_path, apache.APLOG_WARNING)
-        raise apache.SERVER_RETURN, apache.HTTP_FORBIDDEN
-
-    ## import the script
-    path, module_name =  os.path.split(req.filename)
-    if not module_name:
-        module_name = "index"
-
-    # get rid of the suffix
-    #   explanation: Suffixes that will get stripped off
-    #   are those that were specified as an argument to the
-    #   AddHandler directive. Everything else will be considered
-    #   a package.module rather than module.suffix
-    exts = req.get_addhandler_exts()
-    if not exts:
-        # this is SetHandler, make an exception for Python suffixes
-        exts = imp_suffixes
-    if req.extension:  # this exists if we're running in a | .ext handler
-        exts += req.extension[1:] 
-    if exts:
-        suffixes = exts.strip().split()
-        exp = "\\." + "$|\\.".join(suffixes)
-        suff_matcher = re.compile(exp) # python caches these, so its fast
-        module_name = suff_matcher.sub("", module_name)
-
-    # import module (or reload if needed)
-    # the [path] argument tells import_module not to allow modules whose
-    # full path is not in [path] or below.
-    config = req.get_config()
-    autoreload=int(config.get("PythonAutoReload", 1))
-    log=int(config.get("PythonDebug", 0))
-    try:
-        module = apache.import_module(module_name,
-                                      autoreload=autoreload,
-                                      log=log,
-                                      path=[path])
-    except ImportError:
-        et, ev, etb = sys.exc_info()
-        # try again, using default module, perhaps this is a
-        # /directory/function (as opposed to /directory/module/function)
-        func_path = module_name
-        module_name = "index"
-        try:
-            module = apache.import_module(module_name,
-                                          autoreload=autoreload,
-                                          log=log,
-                                          path=[path])
-        except ImportError:
-            # raise the original exception
-            raise et, ev, etb
+    # if the file exists, req.finfo is not None
+    if req.finfo:
+        
+        # The file exists, so we have a request of the form :
+        # /directory/[module][/func_path]
+        
+        # we check whether there is a file name or not
+        path, filename = os.path.split(req.filename)
+        if not filename:
+            
+            # if not, we look for index.py
+            req.filename = os.path.join(path,'index.py')
+
+        if not req.path_info or req.path_info=='/':
+
+            # we don't have a path info, or it's just a slash,
+            # so we'll call index
+            func_path = 'index'
+
+        else:
+
+            # we have a path_info, so we use it, removing the first slash
+            func_path = req.path_info[1:]
+    
+    else:
         
+        # The file does not exist, so it seems we are in the 
+        # case of a request in the form :
+        # /directory/func_path
+
+        # we'll just insert the module name index.py in the middle
+        path, func_path = os.path.split(req.filename)
+        req.filename = os.path.join(path,'index.py')
+
+        # I don't know if it's still possible to have a path_info
+        # but if we have one, we append it to the filename which
+        # is considered as a path_info.
+        if req.path_info:
+            func_path = func_path + req.path_info
+
+    # Now we turn slashes into dots
+    func_path = func_path.replace('/','.')    
+    
+    # We remove the last dot if any
+    if func_path[-1:] == ".":
+        func_path = func_path[:-1] 
+
+    # We use the page cache to load the module
+    module = page_cache[req]
+
     # does it have an __auth__?
     realm, user, passwd = process_auth(req, module)
 
@@ -139,9 +151,11 @@
         
         # process input, if any
         req.form = util.FieldStorage(req, keep_blank_values=1)
-        
         result = util.apply_fs_data(object, req.form, req=req)
 
+    # Now we'll send what the published object has returned
+    # TODO : I'm not sure we should always return apache.OK if something was sent
+    # or if there was an internal redirect.
     if result or req.bytes_sent > 0 or req.next:
         
         if result is None:
@@ -303,7 +317,6 @@
     parts = object_str.split('.')
         
     for i, obj_str in enumerate(parts):
-
         # path components starting with an underscore are forbidden
         if obj_str[0]=='_':
             req.log_error('Cannot traverse %s in %s because '