You are viewing a plain text version of this content. The canonical link for it is here.
Posted to mod_python-commits@quetz.apache.org by nl...@apache.org on 2005/04/27 17:53:36 UTC
svn commit: r164998 - in /httpd/mod_python/trunk/lib/python/mod_python:
cache.py publisher.py
Author: nlehuen
Date: Wed Apr 27 08:53:36 2005
New Revision: 164998
URL: http://svn.apache.org/viewcvs?rev=164998&view=rev
Log:
Tentative fix for MODPYTHON-8 and MODPYTHON-9.
Added:
httpd/mod_python/trunk/lib/python/mod_python/cache.py (with props)
Modified:
httpd/mod_python/trunk/lib/python/mod_python/publisher.py
Added: httpd/mod_python/trunk/lib/python/mod_python/cache.py
URL: http://svn.apache.org/viewcvs/httpd/mod_python/trunk/lib/python/mod_python/cache.py?rev=164998&view=auto
==============================================================================
--- httpd/mod_python/trunk/lib/python/mod_python/cache.py (added)
+++ httpd/mod_python/trunk/lib/python/mod_python/cache.py Wed Apr 27 08:53:36 2005
@@ -0,0 +1,407 @@
+ #
+ # Copyright 2004 Apache Software Foundation
+ #
+ # Licensed under the Apache License, Version 2.0 (the "License"); you
+ # may not use this file except in compliance with the License. You
+ # may obtain a copy of the License at
+ #
+ # http://www.apache.org/licenses/LICENSE-2.0
+ #
+ # Unless required by applicable law or agreed to in writing, software
+ # distributed under the License is distributed on an "AS IS" BASIS,
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ # implied. See the License for the specific language governing
+ # permissions and limitations under the License.
+ #
+ # Originally developed by Gregory Trubetskoy.
+ #
+ # This was donated by Nicolas Lehuen, and also posted to the Python Cookbook
+ # http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/302997
+ #
+ # $Id$
+
+# -*- coding: CP1252 -*-
+from threading import Lock
+from os import fstat
+from time import time,strptime
+from calendar import timegm
+import urllib2
+import re
+import weakref
+
+NOT_INITIALIZED = object()
+
+class Entry(object):
+ """ A cache entry, mostly an internal object. """
+ def __init__(self,key):
+ object.__init__(self)
+ self._key=key
+ self._value=NOT_INITIALIZED
+ self._lock=Lock()
+
+class Cache(object):
+ """ An abstract, multi-threaded cache object. """
+
+ def __init__(self,max_size=0):
+ """ Builds a cache with a limit of max_size entries.
+ If this limit is exceeded, the Least Recently Used entry is discarded.
+ if max_size==0, the cache is unbounded (no LRU rule is applied).
+ """
+ object.__init__(self)
+ self._maxsize=max_size
+ self._dict={}
+ self._lock=Lock()
+
+ # Header of the access list
+ if self._maxsize:
+ self._head=Entry(None)
+ self._head._previous=self._head
+ self._head._next=self._head
+
+ def __setitem__(self,name,value):
+ """ Populates the cache with a given name and value. """
+ self._lock.acquire()
+ try:
+ key = self.key(name)
+ entry = self._dict.get(key)
+ if not entry:
+ entry = Entry(key)
+ self._pack(entry,value)
+ self._dict[key]=entry
+ if self._maxsize:
+ entry._next = entry._previous = None
+ self._access(entry)
+ self._checklru()
+ else:
+ self._pack(entry,value)
+ if self._maxsize:
+ self._access(entry)
+ self.commit()
+ finally:
+ self._lock.release()
+
+ def __getitem__(self,name):
+ """ Gets a value from the cache, builds it if required. """
+ self._lock.acquire()
+ try:
+ key = self.key(name)
+ entry = self._dict.get(key)
+ if not entry:
+ entry = Entry(key)
+ self._dict[key]=entry
+ if self._maxsize:
+ entry._next = entry._previous = None
+ self._access(entry)
+ self._checklru()
+ elif self._maxsize:
+ self._access(entry)
+ finally:
+ self._lock.release()
+
+ entry._lock.acquire()
+ try:
+ value = self._unpack(entry)
+ if value is NOT_INITIALIZED:
+ opened = self.check(name,entry)
+ value = self.build(name,opened,entry)
+ self._pack(entry,value)
+ self.commit()
+ else:
+ opened = self.check(name,entry)
+ if opened is not None:
+ value = self.build(name,opened,entry)
+ self._pack(entry,value)
+ self.commit()
+ return value
+ finally:
+ entry._lock.release()
+
+ def __delitem__(self,key):
+ self._lock.acquire()
+ try:
+ key = self.key(key)
+ del self._dict[key]
+ finally:
+ self._lock.release()
+
+ def mru(self):
+ """ Returns the Most Recently Used key """
+ if self._maxsize:
+ self._lock.acquire()
+ try:
+ return self._head._previous._key
+ finally:
+ self._lock.release()
+ else:
+ return None
+
+ def lru(self):
+ """ Returns the Least Recently Used key """
+ if self._maxsize:
+ self._lock.acquire()
+ try:
+ return self._head._next._key
+ finally:
+ self._lock.release()
+ else:
+ return None
+
+ def key(self,name):
+ """ Override this method to extract a key from the name passed to the [] operator """
+ return name
+
+ def commit(self):
+ """ Override this method if you want to do something each time the underlying dictionary is modified (e.g. make it persistent). """
+ pass
+
+ def clear(self):
+ """ Clears the cache """
+ self._lock.acquire()
+ try:
+ self._dict.clear()
+ if self._maxsize:
+ self._head._next=self._head
+ self._head._previous=self._head
+ finally:
+ self._lock.release()
+
+ def check(self,name,entry):
+ """ Override this method to check whether the entry with the given name is stale. Return None if it is fresh
+ or an opened resource if it is stale. The object returned will be passed to the 'build' method as the 'opened' parameter.
+ Use the 'entry' parameter to store meta-data if required. Don't worry about multiple threads accessing the same name,
+ as this method is properly isolated.
+ """
+ return None
+
+ def build(self,name,opened,entry):
+ """ Build the cached value with the given name from the given opened resource. Use entry to obtain or store meta-data if needed.
+ Don't worry about multiple threads accessing the same name, as this method is properly isolated.
+ """
+ raise NotImplementedError()
+
+ def _access(self,entry):
+ " Internal use only, must be invoked within a cache lock. Updates the access list. """
+ if entry._next is not self._head:
+ if entry._previous is not None:
+ # remove the entry from the access list
+ entry._previous._next=entry._next
+ entry._next._previous=entry._previous
+ # insert the entry at the end of the access list
+ entry._previous=self._head._previous
+ entry._previous._next=entry
+ entry._next=self._head
+ entry._next._previous=entry
+ if self._head._next is self._head:
+ self._head._next=entry
+
+ def _checklru(self):
+ " Internal use only, must be invoked within a cache lock. Removes the LRU entry if needed. """
+ if len(self._dict)>self._maxsize:
+ lru=self._head._next
+ lru._previous._next=lru._next
+ lru._next._previous=lru._previous
+ del self._dict[lru._key]
+
+ def _pack(self,entry,value):
+ """ Store the value in the entry. """
+ entry._value=value
+
+ def _unpack(self,entry):
+ """ Recover the value from the entry, returns NOT_INITIALIZED if it is not OK. """
+ return entry._value
+
+class WeakCache(Cache):
+ """ This cache holds weak references to the values it stores. Whenever a value is not longer
+ normally referenced, it is removed from the cache. Useful for sharing the result of long
+ computations but letting them go as soon as they are not needed by anybody.
+ """
+
+ def _pack(self,entry,value):
+ entry._value=weakref.ref(value,lambda ref: self.__delitem__(entry._key))
+
+ def _unpack(self,entry):
+ if entry._value is NOT_INITIALIZED:
+ return NOT_INITIALIZED
+
+ value = entry._value()
+ if value is None:
+ return NOT_INITIALIZED
+ else:
+ return value
+
+class FileCache(Cache):
+ """ A file cache. Returns the content of the files as a string, given their filename.
+ Whenever the files are modified (according to their modification time) the cache is updated.
+ Override the build method to obtain more interesting behaviour.
+ """
+ def __init__(self,max_size=0,mode='rb'):
+ Cache.__init__(self,max_size)
+ self.mode=mode
+
+ def check(self,name,entry):
+ """ Checks the modification time to determine whether a file has changed or not. """
+ f = file(name,self.mode)
+ fs = fstat(f.fileno())
+ ts1 = fs[-2]
+ try:
+ ts2 = entry._timestamp
+ except AttributeError:
+ ts2 = ts1-1
+
+ if ts2<ts1:
+ entry._timestamp=ts1
+ return f
+ else:
+ return None
+
+ def build(self,name,opened,entry):
+ """ Return the content of the file as a string. Override this for better behaviour. """
+ try:
+ return opened.read()
+ finally:
+ opened.close()
+
+def parseRFC822Time(t):
+ return timegm(strptime(t,"%a, %d %b %Y %H:%M:%S %Z"))
+
+re_max_age=re.compile('max-age\s*=\s*(\d+)',re.I)
+
+class HTTPEntity(object):
+ def __init__(self,entity,metadata):
+ self.entity=entity
+ self.metadata=metadata
+
+ def __repr__(self):
+ return 'HTTPEntity(%s,%s)'%(repr(self.entity),self.metadata)
+
+ def __str__(self):
+ return self.entity
+
+class HTTPCache(Cache):
+ """ An HTTP cache. Returns the entity found at the given URL.
+ Uses Expires, ETag and Last-Modified headers to minimize bandwidth usage.
+ Partial Cache-Control support (only max-age is supported).
+ """
+ def check(self,name,entry):
+ request = urllib2.Request(name)
+
+ try:
+ if time()<entry._expires:
+ return None
+ except AttributeError:
+ pass
+ try:
+ header, value = entry._validator
+ request.headers[header]=value
+ except AttributeError:
+ pass
+ opened = None
+ try:
+ opened = urllib2.urlopen(request)
+ headers = opened.info()
+
+ # expiration handling
+ expiration = False
+ try:
+ match = re_max_age.match(headers['cache-control'])
+ if match:
+ entry._expires=time()+int(match.group(1))
+ expiration = True
+ except (KeyError,ValueError):
+ pass
+ if not expiration:
+ try:
+ date = parseRFC822Time(headers['date'])
+ expires = parseRFC822Time(headers['expires'])
+ entry._expires = time()+(expires-date)
+ expiration = True
+ except KeyError:
+ pass
+
+ # validator handling
+ validation = False
+ try:
+ entry._validator='If-None-Match',headers['etag']
+ validation = True
+ except KeyError:
+ pass
+ if not validation:
+ try:
+ entry._validator='If-Modified-Since',headers['last-modified']
+ except KeyError:
+ pass
+
+ return opened
+ except urllib2.HTTPError, error:
+ if opened: opened.close()
+ if error.code==304:
+ return None
+ else:
+ raise error
+
+ def build(self,name,opened,entry):
+ try:
+ return HTTPEntity(opened.read(),dict(opened.info()))
+ finally:
+ opened.close()
+
+class Module(object):
+ """ Placeholder object for the module definition. """
+ def __init__(self,filename):
+ self.__file__=filename
+
+ def __repr__(self):
+ return '<%s object at 0x%08x from %s>'%(type(self).__name__,id(self),self.__file__)
+
+class ModuleCache(FileCache):
+ """ A module cache. Give it a file name, it returns a module-like object
+ which results from the execution of the Python script it contains.
+ """
+ def __init__(self,max_size=0):
+ FileCache.__init__(self,max_size,'r')
+
+ def build(self,name,opened,entry):
+ try:
+ module = Module(name)
+ exec opened in module.__dict__
+ return module
+ # I used to use imp.load_source but right now I'm trying the stuff above
+ # return imp.load_source(re.sub('\W','_',name),name,opened)
+ finally:
+ opened.close()
+
+class HttpModuleCache(HTTPCache):
+ """ A module cache. Give it a file name, it returns a module-like object
+ which results from the execution of the Python script it contains.
+ """
+ def __init__(self,max_size=0):
+ HTTPCache.__init__(self,max_size)
+
+ def build(self,name,opened,entry):
+ try:
+ module = Module(name)
+ text = opened.read().replace('\r\n','\n')
+ code = compile(text,name,'exec')
+ exec code in module.__dict__
+ return module
+ # I used to use imp.load_source but right now I'm trying the stuff above
+ # return imp.load_source(re.sub('\W','_',name),name,opened)
+ finally:
+ opened.close()
+
+class FunctionCache(Cache):
+ def __init__(self,function,max_size=0):
+ Cache.__init__(self,max_size)
+ self.function=function
+
+ def __call__(self,*args,**kw):
+ if kw:
+ # a dict is not hashable so we build a tuple of (key,value) pairs
+ kw = tuple(kw.iteritems())
+ return self[args,kw]
+ else:
+ return self[args,()]
+
+ def build(self,name,opened,entry):
+ args,kw = name
+ return self.function(*args,**dict(kw))
Propchange: httpd/mod_python/trunk/lib/python/mod_python/cache.py
------------------------------------------------------------------------------
svn:keywords = Id
Modified: httpd/mod_python/trunk/lib/python/mod_python/publisher.py
URL: http://svn.apache.org/viewcvs/httpd/mod_python/trunk/lib/python/mod_python/publisher.py?rev=164998&r1=164997&r2=164998&view=diff
==============================================================================
--- httpd/mod_python/trunk/lib/python/mod_python/publisher.py (original)
+++ httpd/mod_python/trunk/lib/python/mod_python/publisher.py Wed Apr 27 08:53:36 2005
@@ -43,81 +43,93 @@
imp_suffixes = " ".join([x[0][1:] for x in imp.get_suffixes()])
+from cache import ModuleCache, NOT_INITIALIZED
+
+class PageCache(ModuleCache):
+ """ This is the cache for page objects. Handles the automatic reloading of pages. """
+
+ def key(self,req):
+ """ Extracts the filename from the request """
+ return req.filename
+
+ def check(self,req,entry):
+ config = req.get_config()
+ autoreload=int(config.get("PythonAutoReload", 1))
+ if autoreload==0 and entry._value is not NOT_INITIALIZED:
+ # if we don't want to reload and we have a value,
+ # then we consider it fresh
+ return None
+ else:
+ return ModuleCache.check(self,req.filename,entry)
+
+ def build(self,req,opened,entry):
+ config = req.get_config()
+ log=int(config.get("PythonDebug", 0))
+ if log:
+ if entry._value is NOT_INITIALIZED:
+ req.log_error('Publisher loading page %s'%req.filename,apache.APLOG_NOTICE)
+ else:
+ req.log_error('Publisher reloading page %s'%req.filename,apache.APLOG_NOTICE)
+ return ModuleCache.build(self,req,opened,entry)
+
+page_cache = PageCache()
+
def handler(req):
req.allow_methods(["GET", "POST", "HEAD"])
if req.method not in ["GET", "POST", "HEAD"]:
raise apache.SERVER_RETURN, apache.HTTP_METHOD_NOT_ALLOWED
- func_path = ""
- if req.path_info:
- func_path = req.path_info[1:] # skip first /
- func_path = func_path.replace("/", ".")
- if func_path[-1:] == ".":
- func_path = func_path[:-1]
-
- # default to 'index' if no path_info was given
- if not func_path:
- func_path = "index"
-
- # if any part of the path begins with "_", abort
- # We need to make this test here, before resolve_object,
- # to prevent the loading of modules whose name begins with
- # an underscore.
- if func_path[0] == '_' or func_path.count("._"):
- req.log_error('Cannot access %s because '
- 'it contains at least an underscore'
- % func_path, apache.APLOG_WARNING)
- raise apache.SERVER_RETURN, apache.HTTP_FORBIDDEN
-
- ## import the script
- path, module_name = os.path.split(req.filename)
- if not module_name:
- module_name = "index"
-
- # get rid of the suffix
- # explanation: Suffixes that will get stripped off
- # are those that were specified as an argument to the
- # AddHandler directive. Everything else will be considered
- # a package.module rather than module.suffix
- exts = req.get_addhandler_exts()
- if not exts:
- # this is SetHandler, make an exception for Python suffixes
- exts = imp_suffixes
- if req.extension: # this exists if we're running in a | .ext handler
- exts += req.extension[1:]
- if exts:
- suffixes = exts.strip().split()
- exp = "\\." + "$|\\.".join(suffixes)
- suff_matcher = re.compile(exp) # python caches these, so its fast
- module_name = suff_matcher.sub("", module_name)
-
- # import module (or reload if needed)
- # the [path] argument tells import_module not to allow modules whose
- # full path is not in [path] or below.
- config = req.get_config()
- autoreload=int(config.get("PythonAutoReload", 1))
- log=int(config.get("PythonDebug", 0))
- try:
- module = apache.import_module(module_name,
- autoreload=autoreload,
- log=log,
- path=[path])
- except ImportError:
- et, ev, etb = sys.exc_info()
- # try again, using default module, perhaps this is a
- # /directory/function (as opposed to /directory/module/function)
- func_path = module_name
- module_name = "index"
- try:
- module = apache.import_module(module_name,
- autoreload=autoreload,
- log=log,
- path=[path])
- except ImportError:
- # raise the original exception
- raise et, ev, etb
+ # if the file exists, req.finfo is not None
+ if req.finfo:
+
+ # The file exists, so we have a request of the form :
+ # /directory/[module][/func_path]
+
+ # we check whether there is a file name or not
+ path, filename = os.path.split(req.filename)
+ if not filename:
+
+ # if not, we look for index.py
+ req.filename = os.path.join(path,'index.py')
+
+ if not req.path_info or req.path_info=='/':
+
+ # we don't have a path info, or it's just a slash,
+ # so we'll call index
+ func_path = 'index'
+
+ else:
+
+ # we have a path_info, so we use it, removing the first slash
+ func_path = req.path_info[1:]
+
+ else:
+ # The file does not exist, so it seems we are in the
+ # case of a request in the form :
+ # /directory/func_path
+
+ # we'll just insert the module name index.py in the middle
+ path, func_path = os.path.split(req.filename)
+ req.filename = os.path.join(path,'index.py')
+
+ # I don't know if it's still possible to have a path_info
+ # but if we have one, we append it to the filename which
+ # is considered as a path_info.
+ if req.path_info:
+ func_path = func_path + req.path_info
+
+ # Now we turn slashes into dots
+ func_path = func_path.replace('/','.')
+
+ # We remove the last dot if any
+ if func_path[-1:] == ".":
+ func_path = func_path[:-1]
+
+ # We use the page cache to load the module
+ module = page_cache[req]
+
# does it have an __auth__?
realm, user, passwd = process_auth(req, module)
@@ -139,9 +151,11 @@
# process input, if any
req.form = util.FieldStorage(req, keep_blank_values=1)
-
result = util.apply_fs_data(object, req.form, req=req)
+ # Now we'll send what the published object has returned
+ # TODO : I'm not sure we should always return apache.OK if something was sent
+ # or if there was an internal redirect.
if result or req.bytes_sent > 0 or req.next:
if result is None:
@@ -303,7 +317,6 @@
parts = object_str.split('.')
for i, obj_str in enumerate(parts):
-
# path components starting with an underscore are forbidden
if obj_str[0]=='_':
req.log_error('Cannot traverse %s in %s because '