You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@subversion.apache.org by br...@apache.org on 2012/03/16 23:42:02 UTC
svn commit: r1301806 - in /subversion/trunk/notes/directory-index:
dirindex.py logimport.py schema.sql
Author: brane
Date: Fri Mar 16 22:42:01 2012
New Revision: 1301806
URL: http://svn.apache.org/viewvc?rev=1301806&view=rev
Log:
The Great Refactoring.
* notes/directory-index/schema.sql:
Rename "pathindex" to the more generic "strindex".
Record tree-change operations in the directory index.
Define all interesting queries here.
* notes/directory-index/dirindex.py:
Parse schema.sql to load schema and queries.
Use a temporary (in-memory) table to build the tree transaction,
this is orders of magnitude faster than using Python dicts and lists,
and also quite a bit kinder to the memory manager.
* notes/directory-index/logimport.py:
Better logging configuration and error handling.
use proper option parsing.
Modified:
subversion/trunk/notes/directory-index/dirindex.py
subversion/trunk/notes/directory-index/logimport.py
subversion/trunk/notes/directory-index/schema.sql
Modified: subversion/trunk/notes/directory-index/dirindex.py
URL: http://svn.apache.org/viewvc/subversion/trunk/notes/directory-index/dirindex.py?rev=1301806&r1=1301805&r2=1301806&view=diff
==============================================================================
--- subversion/trunk/notes/directory-index/dirindex.py (original)
+++ subversion/trunk/notes/directory-index/dirindex.py Fri Mar 16 22:42:01 2012
@@ -23,10 +23,64 @@ import sqlite3
class Error(Exception):
- pass
+ def __init__(self, msg, *args, **kwargs):
+ opcode = kwargs.pop("action", None)
+ if opcode is not None:
+ msg = Dirent._opname(opcode) + msg
+ super(Error, self).__init__(msg, *args, **kwargs)
+
+
+class SQL(object):
+ """Named index of SQL schema definitions and statements.
+
+ Parses "schema.sql" and creates a class-level attribute for each
+ script and statement in that file.
+ """
+
+ @classmethod
+ def _load_statements(cls):
+ import cStringIO
+ import pkgutil
+ import re
+
+ comment_rx = re.compile(r"\s*--.*$")
+ header_rx = re.compile(r"^---(STATEMENT|SCRIPT)"
+ r"\s+(?P<name>[_A-Z]+)$")
+
+ name = None
+ content = None
+
+ def record_current_statement():
+ if name is not None:
+ setattr(cls, name, content.getvalue())
+
+ schema = cStringIO.StringIO(pkgutil.get_data(__name__, "schema.sql"))
+ for line in schema:
+ line = line.rstrip()
+ if not line:
+ continue
+
+ header = header_rx.match(line)
+ if header:
+ record_current_statement()
+ name = header.group("name")
+ content = cStringIO.StringIO()
+ continue
+
+ line = comment_rx.sub("", line)
+ if not line:
+ continue
+
+ if content is not None:
+ content.write(line)
+ content.write("\n")
+ record_current_statement()
+SQL._load_statements()
class SQLobject(object):
+ """Base for ORM abstractions."""
+
__slots__ = ()
def __init__(self, **kwargs):
for name, val in kwargs.items():
@@ -68,91 +122,105 @@ class SQLobject(object):
class Revent(SQLobject):
+ """O/R mapping for the "revision" table."""
+
__slots__ = ("version", "created", "author", "log")
def _put(self, cursor):
if self.created is None:
now = datetime.datetime.utcnow()
self.created = now.strftime("%Y-%m-%dT%H:%M:%S.%fZ")
- self._execute(cursor,
- "INSERT INTO revision (version, created, author, log)"
- " VALUES (?, ?, ?, ?)",
+ self._execute(cursor, SQL.INSERT_REVISION_RECORD,
[self.version, self.created, self.author, self.log])
@classmethod
def _get(cls, cursor, pkey):
- cursor.execute("SELECT * FROM revision WHERE version = ?", [pkey])
+ cursor.execute(SQL.GET_REVENT_BY_VERSION, [pkey])
return cls._from_row(cursor.fetchone())
-class Pathent(SQLobject):
- __slots__ = ("pathid", "abspath")
+class Strent(SQLobject):
+ """O/R mapping for the "strindex" table."""
+
+ __slots__ = ("strid", "content")
def _put(self, cursor):
- self._execute(cursor,
- "INSERT INTO pathindex (abspath) VALUES (?)",
- [self.abspath])
- self.pathid = cursor.lastrowid
+ self._execute(cursor, SQL.INSERT_STRINDEX_RECORD, [self.content])
+ self.strid = cursor.lastrowid
@classmethod
def _get(cls, cursor, pkey):
- cls._execute(cursor,
- "SELECT * FROM pathindex WHERE pathid = ?",
- [pkey])
+ cls._execute(cursor, SQL.GET_STRENT_BY_STRID, [pkey])
return cls._from_row(cursor.fetchone())
@classmethod
- def _find(cls, cursor, abspath):
- cls._execute(cursor,
- "SELECT * FROM pathindex WHERE abspath = ?",
- [abspath])
+ def _find(cls, cursor, content):
+ cls._execute(cursor, SQL.GET_STRENT_BY_CONTENT, [content])
return cls._from_row(cursor.fetchone())
class Dirent(SQLobject):
- __slots__ = ("rowid", "pathid", "version", "deleted",
- "kind", "origin", "copied", "subtree",
+ """O/R mapping for a virtual non-materialized view representing
+ a natural join of the "dirindex" and "pathindex" tables."""
+
+ __slots__ = ("rowid", "origin", "pathid", "version",
+ "kind", "opcode", "subtree",
"abspath")
+ # Kinds
+ DIR = "D"
+ FILE = "F"
+
+ # Opcodes
+ ADD = "A"
+ REPLACE = "R"
+ MODIFY = "M"
+ DELETE = "D"
+ RENAME = "N"
+
+ # Opcode names
+ __opnames = {ADD: "add",
+ REPLACE: "replace",
+ MODIFY: "modify",
+ DELETE: "delete",
+ RENAME: "rename"}
+
+ @classmethod
+ def _opname(cls, opcode):
+ return cls.__opnames.get(opcode)
+
+ @property
+ def _deleted(self):
+ return (self.opcode == self.DELETE)
+
def __str__(self):
- return "%3d %c%c %s" % (
+ return "%d %c%c%c %c %s" % (
self.version,
- self.deleted and "x" or " ",
- self.kind and "f" or "d",
- self.abspath)
+ self.subtree and "(" or " ",
+ self.opcode,
+ self.subtree and ")" or " ",
+ self.kind, self.abspath)
def _put(self, cursor):
- pathent = Pathent._find(cursor, self.abspath)
- if pathent is None:
- pathent = Pathent(abspath = self.abspath)
- pathent._put(cursor)
- self._execute(cursor,
- "INSERT INTO dirindex"
- " (pathid, version, deleted,"
- " kind, origin, copied, subtree)"
- " VALUES (?, ?, ?, ?, ?, ?, ?)",
- [pathent.pathid, self.version, self.deleted,
- self.kind, self.origin, self.copied, self.subtree])
+ strent = Strent._find(cursor, self.abspath)
+ if strent is None:
+ strent = Strent(content = self.abspath)
+ strent._put(cursor)
+ self._execute(cursor, SQL.INSERT_DIRINDEX_RECORD,
+ [self.origin, strent.strid, self.version,
+ self.kind, self.opcode,self.subtree])
self.rowid = cursor.lastrowid
- self.pathid = pathent.pathid
+ self.pathid = strent.strid
@classmethod
def _get(cls, cursor, pkey):
- cls._execute(cursor,
- "SELECT dirindex.*, pathindex.abspath"
- " FROM dirindex JOIN pathindex"
- " ON dirindex.pathid = pathindex.pathid"
- " WHERE dirindex.rowid = ?", [pkey])
+ cls._execute(cursor, SQL.GET_DIRENT_BY_ROWID, [pkey])
return cls._from_row(cursor.fetchone())
@classmethod
def _find(cls, cursor, abspath, version):
cls._execute(cursor,
- "SELECT dirindex.*, pathindex.abspath"
- " FROM dirindex JOIN pathindex"
- " ON dirindex.pathid = pathindex.pathid"
- " WHERE pathindex.abspath = ?"
- " AND dirindex.version = ?",
+ SQL.GET_DIRENT_BY_ABSPATH_AND_VERSION,
[abspath, version])
return cls._from_row(cursor.fetchone())
@@ -162,53 +230,27 @@ class Index(object):
self.conn = sqlite3.connect(database, isolation_level = "IMMEDIATE")
self.conn.row_factory = sqlite3.Row
self.cursor = self.conn.cursor()
+ self.cursor.execute("PRAGMA page_size = 4096")
+ self.cursor.execute("PRAGMA temp_store = MEMORY")
self.cursor.execute("PRAGMA foreign_keys = ON")
self.cursor.execute("PRAGMA case_sensitive_like = ON")
self.cursor.execute("PRAGMA encoding = 'UTF-8'")
- __schema = """
-DROP TABLE IF EXISTS dirindex;
-DROP TABLE IF EXISTS pathindex;
-DROP TABLE IF EXISTS revision;
-
-CREATE TABLE revision (
- version integer NOT NULL PRIMARY KEY,
- created timestamp NOT NULL,
- author varchar NULL,
- log varchar NULL
-);
-
-CREATE TABLE pathindex (
- pathid integer NOT NULL PRIMARY KEY,
- abspath varchar NOT NULL UNIQUE
-);
-
-CREATE TABLE dirindex (
- rowid integer NOT NULL PRIMARY KEY,
- pathid integer NOT NULL REFERENCES pathindex(pathid),
- version integer NOT NULL REFERENCES revision(version),
- deleted boolean NOT NULL,
- kind integer NOT NULL,
- origin integer NULL REFERENCES dirindex(rowid),
- copied boolean NOT NULL,
- subtree boolean NOT NULL
-);
-CREATE UNIQUE INDEX dirindex_versioned_tree ON dirindex(pathid, version DESC);
-CREATE INDEX dirindex_successor_list ON dirindex(origin);
-CREATE INDEX dirindex_deleted ON dirindex(deleted);
-
-INSERT INTO revision (version, created, author, log)
- VALUES (0, 'EPOCH', NULL, NULL);
-INSERT INTO pathindex (pathid, abspath) VALUES (0, '/');
-INSERT INTO dirindex (rowid, pathid, version, deleted,
- kind, origin, copied, subtree)
- VALUES (0, 0, 0, 0, 0, NULL, 0, 0);
-"""
+ @staticmethod
+ def normpath(abspath):
+ return abspath.rstrip("/")
+
+ @staticmethod
+ def subtree_pattern(abspath):
+ return (abspath.rstrip("/")
+ .replace("#", "##")
+ .replace("%", "#%")
+ .replace("_", "#_")) + "/%"
def initialize(self):
try:
- SQLobject._log("%s", self.__schema)
- self.cursor.executescript(self.__schema)
+ SQLobject._log("%s", SQL.CREATE_SCHEMA)
+ self.cursor.executescript(SQL.CREATE_SCHEMA)
self.commit()
finally:
self.rollback()
@@ -245,34 +287,20 @@ INSERT INTO dirindex (rowid, pathid, ver
def lookup(self, abspath, version):
SQLobject._execute(
self.cursor,
- "SELECT dirindex.*, pathindex.abspath FROM dirindex"
- " JOIN pathindex ON dirindex.pathid = pathindex.pathid"
- " WHERE pathindex.abspath = ? AND dirindex.version <= ?"
- " ORDER BY pathindex.abspath ASC, dirindex.version DESC"
- " LIMIT 1",
+ SQL.LOOKUP_ABSPATH_AT_REVISION,
[abspath, version])
row = self.cursor.fetchone()
- if row is not None and not row["deleted"]:
- return Dirent._from_row(row)
+ if row is not None:
+ dirent = Dirent._from_row(row)
+ if not dirent._deleted:
+ return dirent
return None
def subtree(self, abspath, version):
- pattern = (abspath.rstrip("/")
- .replace("#", "##")
- .replace("%", "#%")
- .replace("_", "#_")) + "/%"
SQLobject._execute(
self.cursor,
- "SELECT dirindex.*, pathindex.abspath FROM dirindex"
- " JOIN pathindex ON dirindex.pathid = pathindex.pathid"
- " JOIN (SELECT pathid, MAX(version) AS maxver FROM dirindex"
- " WHERE version <= ? GROUP BY pathid) AS filtered"
- " ON dirindex.pathid == filtered.pathid"
- " AND dirindex.version == filtered.maxver"
- " WHERE pathindex.abspath LIKE ? ESCAPE '#'"
- " AND NOT dirindex.deleted"
- " ORDER BY pathindex.abspath ASC",
- [version, pattern])
+ SQL.LIST_SUBTREE_AT_REVISION,
+ [version, self.subtree_pattern(abspath)])
for row in self.cursor:
yield Dirent._from_row(row)
@@ -286,10 +314,7 @@ INSERT INTO dirindex (rowid, pathid, ver
assert isinstance(dirent, Dirent)
SQLobject._execute(
self.cursor,
- "SELECT dirindex.*, pathindex.abspath FROM dirindex"
- " JOIN pathindex ON dirindex.pathid = pathindex.pathid"
- " WHERE dirindex.origin = ?"
- " ORDER BY pathindex.abspath ASC, dirindex.version ASC",
+ SQL.LIST_DIRENT_SUCCESSORS,
[dirent.rowid])
for row in self.cursor:
yield Dirent._from_row(row)
@@ -304,160 +329,222 @@ class Revision(object):
self.__created = created
self.__author = author
self.__log = log
- self.__context = None
+ self.__txn = None
+ self.__deferred = None
index.rollback()
+ class __Context(object):
+ def __init__(self, version, connection):
+ self.version = version
+ self.conn = connection
+ self.cursor = connection.cursor()
+ SQLobject._execute(self.cursor, SQL.CREATE_TRANSACTION_CONTEXT)
+
+ def clear(self):
+ SQLobject._execute(self.cursor, SQL.REMOVE_TRANSACTION_CONTEXT)
+
+ def __iter__(self):
+ SQLobject._execute(self.cursor, SQL.LIST_TRANSACTION_RECORDS)
+ for row in self.cursor:
+ dirent = Dirent._from_row(row)
+ dirent.version = self.version
+ yield dirent
+
+ def lookup(self, abspath):
+ SQLobject._execute(self.cursor,
+ SQL.GET_TRANSACTION_RECORD,
+ [abspath])
+ row = self.cursor.fetchone()
+ if row is not None:
+ dirent = Dirent._from_row(row)
+ dirent.version = self.version
+ return dirent
+ return None
+
+ def remove(self, abspath, purge=False):
+ target = self.lookup(abspath)
+ if not target:
+ raise Error("txn context: remove nonexistent " + abspath)
+ logging.debug("txn context: remove %s", abspath)
+ SQLobject._execute(self.cursor,
+ SQL.REMOVE_TRANSACTION_RECORD,
+ [abspath])
+ if purge:
+ logging.debug("txn context: purge %s/*", abspath)
+ SQLobject._execute(self.cursor,
+ SQL.REMOVE_TRANSACTION_SUBTREE,
+ [Index.subtree_pattern(abspath)])
+
+ def record(self, dirent, replace=False, purge=False):
+ target = self.lookup(dirent.abspath)
+ if target is not None:
+ if not replace:
+ raise Error("txn context: record existing "
+ + dirent.abspath)
+ elif not target.subtree:
+ raise Error("txn context: replace conflict "
+ + dirent.abspath)
+ self.remove(target.abspath, purge and target.kind == Dirent.DIR)
+ SQLobject._execute(self.cursor,
+ SQL.INSERT_TRANSACTION_RECORD,
+ [dirent.origin, dirent.abspath,
+ dirent.kind, dirent.opcode, dirent.subtree])
+
def __enter__(self):
if self.revent is not None:
raise Error("revision is read-only")
- SQLobject._log("BEGIN")
+ self.__context = self.__Context(self.version, self.index.conn)
+ SQLobject._execute(self.index.cursor, "BEGIN")
self.revent = self.index.new_revision(
self.version, self.__created, self.__author, self.__log)
- self.__context = {}
return self
def __exit__(self, exc_type, exc_value, traceback):
try:
- if exc_type is None and len(self.__context):
- for dirent in sorted(self.__context.itervalues()):
+ if exc_type is None:
+ for dirent in self.__context:
self.index.insert(dirent)
+ logging.debug("insert: %s", dirent)
self.index.commit()
+ else:
+ self.index.rollback()
except:
self.index.rollback()
raise
finally:
+ self.__context.clear()
self.__context = None
- def __record(self, dirent, action):
- self.__context[dirent.abspath] = dirent
- if dirent.subtree:
- action = "(%s)" % action
- else:
- action = " %s " % action
- logging.debug(" %-9s %s", action, dirent)
+ def __record(self, dirent, replace=False, purge=False):
+ self.__context.record(dirent, replace, purge)
+ logging.debug("record: %s", dirent)
- def __check_writable(self, action):
+ def __check_writable(self, opcode):
if self.__context is None:
- raise Error(action + " requires a transaction")
+ raise Error(" requires a transaction", action=opcode)
- def __check_not_root(self, abspath, action):
+ def __check_not_root(self, abspath, opcode):
if abspath.rstrip("/") == "":
- raise Error(action + " not allowed on /")
+ raise Error(" not allowed on /", action=opcode)
- def __find_target(self, abspath, action):
- target = self.__context.get(abspath)
- if target is not None and not target.subtree:
- raise Error(action + " overrides explicit " + abspath)
- if target is None:
- target = self.index.lookup(abspath, self.version - 1)
+ def __find_target(self, abspath, opcode):
+ target = self.__context.lookup(abspath)
+ if target is not None:
+ if not target.subtree:
+ raise Error(" overrides explicit " + abspath, action=opcode)
+ return target, target.origin
+ target = self.index.lookup(abspath, self.version - 1)
if target is None:
- raise Error(action + " target does not exist: " + abspath)
- return target
+ raise Error(" target does not exist: " + abspath, action=opcode)
+ return target, target.rowid
def lookup(self, abspath):
try:
- return self.index.lookup(abspath, self.version)
+ return self.index.lookup(self.index.normpath(abspath),
+ self.version)
finally:
if self.__context is None:
self.index.rollback()
- def __add(self, action, abspath, kind, frompath, fromver):
+ def __add(self, opcode, abspath, kind, frompath, fromver):
origin = None
if frompath is not None:
+ frompath = self.index.normpath(frompath)
+ fromver = int(fromver)
origin = self.index.lookup(frompath, fromver)
if origin is None:
- raise Error(action + " source does not exist: " + frompath)
+ raise Error(" source does not exist: " + frompath, action=opcode)
if origin.kind != kind:
- raise Error(action + " changes the source object kind")
+ raise Error(" changes the source object kind", action=opcode)
origin = origin.rowid
- dirent = Dirent(abspath = abspath,
+ dirent = Dirent(origin = origin,
+ abspath = abspath,
version = self.version,
- deleted = 0,
kind = kind,
- origin = origin,
- copied = int(origin is not None),
+ opcode = opcode,
subtree = 0)
- self.__record(dirent, action)
- if frompath is not None:
- offset = len(frompath.rstrip("/"))
- prefix = abspath.rstrip("/")
- for source in self.index.subtree(frompath, fromver):
- dirent = Dirent(rowid = source.rowid,
- abspath = prefix + source.abspath[offset:],
- version = self.version,
- deleted = 0,
- kind = source.kind,
- origin = source.rowid,
- copied = 1,
- subtree = 1)
- self.__record(dirent, action)
+ self.__record(dirent,
+ replace=(opcode == Dirent.REPLACE),
+ purge=(opcode == Dirent.REPLACE))
+ if frompath is not None and dirent.kind == Dirent.DIR:
+ prefix = dirent.abspath
+ offset = len(frompath)
+ for source in list(self.index.subtree(frompath, fromver)):
+ abspath = prefix + source.abspath[offset:]
+ self.__record(Dirent(origin = source.rowid,
+ abspath = abspath,
+ version = self.version,
+ kind = source.kind,
+ opcode = opcode,
+ subtree = 1))
def add(self, abspath, kind, frompath=None, fromver=None):
- action = "add"
- self.__check_writable(action)
- self.__check_not_root(abspath, action)
- return self.__add(action, abspath, kind, frompath, fromver)
+ opcode = Dirent.ADD
+ abspath = self.index.normpath(abspath)
+ self.__check_writable(opcode)
+ self.__check_not_root(abspath, opcode)
+ return self.__add(opcode, abspath, kind, frompath, fromver)
def replace(self, abspath, kind, frompath=None, fromver=None):
- action = "replace"
- self.__check_writable(action)
- self.__check_not_root(abspath, action)
- self.__find_target(abspath, action)
- return self.__add(action, abspath, kind, frompath, fromver)
+ opcode = Dirent.REPLACE
+ abspath = self.index.normpath(abspath)
+ self.__check_writable(opcode)
+ self.__check_not_root(abspath, opcode)
+ self.__find_target(abspath, opcode)
+ return self.__add(opcode, abspath, kind, frompath, fromver)
def modify(self, abspath):
- action = "modify"
- self.__check_writable(action)
- target = self.__find_target(abspath, action)
- dirent = Dirent(abspath = abspath,
+ opcode = Dirent.MODIFY
+ abspath = self.index.normpath(abspath)
+ self.__check_writable(opcode)
+ target, origin = self.__find_target(abspath, opcode)
+ dirent = Dirent(origin = origin,
+ abspath = abspath,
version = self.version,
- deleted = 0,
kind = target.kind,
- origin = target.rowid,
- copied = 0,
+ opcode = opcode,
subtree = 0)
- self.__record(dirent, action)
+ self.__record(dirent, replace=True)
def delete(self, abspath):
- action = "replace"
- self.__check_writable(action)
- self.__check_not_root(abspath, action)
- target = self.__find_target(abspath, action)
- dirent = Dirent(abspath = abspath,
+ opcode = Dirent.DELETE
+ abspath = self.index.normpath(abspath)
+ self.__check_writable(opcode)
+ self.__check_not_root(abspath, opcode)
+ target, origin = self.__find_target(abspath, opcode)
+ dirent = Dirent(origin = origin,
+ abspath = abspath,
version = self.version,
- deleted = 1,
kind = target.kind,
- origin = target.rowid,
- copied = 0,
+ opcode = opcode,
subtree = 0)
- self.__record(dirent, action)
- for source in self.index.subtree(abspath, self.version - 1):
- dirent = Dirent(rowid = source.rowid,
- abspath = source.abspath,
- version = self.version,
- deleted = 1,
- kind = source.kind,
- origin = source.rowid,
- copied = 0,
- subtree = 1)
- self.__record(dirent, action)
+ self.__record(dirent, replace=True, purge=True)
+ if target.version < self.version and dirent.kind == Dirent.DIR:
+ for source in self.index.subtree(abspath, self.version - 1):
+ self.__record(Dirent(origin = source.rowid,
+ abspath = source.abspath,
+ version = self.version,
+ kind = source.kind,
+ opcode = opcode,
+ subtree = 1))
def simpletest(database):
ix = Index(database)
ix.initialize()
with Revision(ix, 1) as rev:
- rev.add(u'/A', 0)
- rev.add(u'/A/B', 0)
- rev.add(u'/A/B/c', 1)
+ rev.add(u'/A', Dirent.DIR)
+ rev.add(u'/A/B', Dirent.DIR)
+ rev.add(u'/A/B/c', Dirent.FILE)
with Revision(ix, 2) as rev:
- rev.add(u'/A/B/d', 1)
+ rev.add(u'/A/B/d', Dirent.FILE)
with Revision(ix, 3) as rev:
- rev.add(u'/X', 0, u'/A', 1)
- rev.add(u'/X/B/d', 1, u'/A/B/d', 2)
+ rev.add(u'/X', Dirent.DIR, u'/A', 1)
+ rev.add(u'/X/B/d', Dirent.FILE, u'/A/B/d', 2)
with Revision(ix, 4) as rev:
+ # rev.rename(u'/X/B/d', u'/X/B/x')
rev.delete(u'/X/B/d')
- rev.add(u'/X/B/x', 1, u'/X/B/d', 3)
+ rev.add(u'/X/B/x', Dirent.FILE, u'/X/B/d', 3)
with Revision(ix, 5) as rev:
rev.delete(u'/A')
@@ -474,12 +561,13 @@ def simpletest(database):
print "/A/B/c@4 -> %s@%d" % (dirent.abspath, dirent.version)
for succ in ix.successors(dirent):
print "%11s %s %s@%d" % (
- "", succ.deleted and "x_x" or "-->",
+ "", succ._deleted and "x_x" or "-->",
succ.abspath, succ.version)
ix.close()
def loggedsimpletest(database):
import sys
- logging.basicConfig(level=SQLobject.LOGLEVEL, stream=sys.stderr)
+ logging.basicConfig(level=logging.DEBUG, #SQLobject.LOGLEVEL,
+ stream=sys.stderr)
simpletest(database)
Modified: subversion/trunk/notes/directory-index/logimport.py
URL: http://svn.apache.org/viewvc/subversion/trunk/notes/directory-index/logimport.py?rev=1301806&r1=1301805&r2=1301806&view=diff
==============================================================================
--- subversion/trunk/notes/directory-index/logimport.py (original)
+++ subversion/trunk/notes/directory-index/logimport.py Fri Mar 16 22:42:01 2012
@@ -16,7 +16,11 @@
# specific language governing permissions and limitations
# under the License.
-# Usage: logimport <database-name> <repoa-url> [path-to-svn]
+# Usage: logimport [options] <database-name> <repoa-url>
+# Options:
+# --svn=PATH Use a non-default svn binary
+# --debug Enable debug-level logging to logimport.debug.log
+# --sqldebug Enable SQL-level logging to logimport.sql.log
#
# Converts the history of the repository at <repos-url> into a
# single-tree directory index.
@@ -31,11 +35,11 @@ try:
except ImportError:
from xml.etree.ElementTree import iterparse
-from dirindex import Index, Revision
+from dirindex import Dirent, Index, Revision
def parse(index, stream):
- kindmap = {"dir": 0, "file": 1}
+ kindmap = {"dir": Dirent.DIR, "file": Dirent.FILE}
version = None
revcount = 0
@@ -46,11 +50,11 @@ def parse(index, stream):
version = int(logentry.get("revision"))
revcount += 1
- if revcount == 1 or not revcount % 1000:
- revlogger = logging.info
+ if revcount == 1:
+ logging.info("initial: r%d", version)
else:
- revlogger = logging.debug
- revlogger("%d: r%d", revcount, version)
+ logger = not revcount % 1000 and logging.info or logging.debug
+ logger("%d: r%d", revcount, version)
created = logentry.find("date")
if created is not None:
@@ -77,7 +81,7 @@ def parse(index, stream):
action = path.get("action")
handler, newnode = actionmap[action]
if not newnode:
- logging.debug(" %-s %s", action, abspath)
+ logging.debug(" %s %s", action, abspath)
handler(abspath)
continue
@@ -98,24 +102,83 @@ def parse(index, stream):
def logimport(database, url, svn):
- index = Index(database)
- index.initialize()
- index.cursor.execute("PRAGMA journal_mode = MEMORY")
- index.cursor.execute("PRAGMA locking_mode = EXCLUSIVE")
- index.cursor.execute("PRAGMA synchronous = OFF")
- svnlog = subprocess.Popen(
- [svn, "log", "-v", "--xml", "-r1:HEAD", url],
- stdout = subprocess.PIPE)
- parse(index, svnlog.stdout)
- sys.exit(svnlog.wait())
+ try:
+ index = Index(database)
+ index.cursor.execute("PRAGMA journal_mode = MEMORY")
+ index.cursor.execute("PRAGMA locking_mode = EXCLUSIVE")
+ index.cursor.execute("PRAGMA synchronous = OFF")
+ index.cursor.execute("PRAGMA cache_size = -100000")
+ index.initialize()
+ svnlog = subprocess.Popen(
+ [svn, "log", "-v", "--xml", "-r1:HEAD", url],
+ stdout = subprocess.PIPE)
+ parse(index, svnlog.stdout)
+ return svnlog.wait()
+ except:
+ logging.exception("logimport failed")
+ try:
+ svnlog.wait()
+ except:
+ pass
+ return 2
+
+
+def main():
+ import logging.config
+ from optparse import OptionParser
+ from dirindex import SQLobject
+
+ parser = OptionParser("Usage: %prog [options] <database-name> <repoa-url>")
+ parser.add_option("--svn", action="store", default="svn",
+ help="Use a non-default svn binary", metavar="PATH")
+ parser.add_option("--debug", action="store_true", default=False,
+ help="Enable debug-level logging to logimport.debug.log")
+ parser.add_option("--sqldebug", action="store_true", default=False,
+ help="Enable SQL-level logging to logimport.debug.log")
+
+ opts, args = parser.parse_args()
+ if len(args) != 2:
+ parser.error("wrong number of arguments")
+ database, url = args
+
+ logconfig = {
+ "version": 1,
+ "formatters": {
+ "console": {"format": "%(levelname)-7s %(message)s"},
+ "logfile": {"format": "%(asctime)s %(levelname)-7s %(message)s"}},
+ "handlers": {
+ "console": {
+ "class": "logging.StreamHandler",
+ "level": logging.INFO,
+ "stream": sys.stderr,
+ "formatter": "console"}},
+ "root": {
+ "level": logging.INFO,
+ "handlers": ["console"]}}
+
+ handlers = logconfig["root"]["handlers"]
+ if opts.debug:
+ logconfig["root"]["level"] = logging.DEBUG
+ logconfig["handlers"]["debug"] = {
+ "class": "logging.FileHandler",
+ "level": logging.DEBUG,
+ "mode": "w",
+ "filename": "./logimport.debug.log",
+ "formatter": "logfile"}
+ handlers.append("debug")
+ if opts.sqldebug:
+ logconfig["root"]["level"] = SQLobject.LOGLEVEL
+ logconfig["handlers"]["sqldebug"] = {
+ "class": "logging.FileHandler",
+ "level": SQLobject.LOGLEVEL,
+ "mode": "w",
+ "filename": "./logimport.sql.log",
+ "formatter": "logfile"}
+ handlers.append("sqldebug")
+
+ logging.config.dictConfig(logconfig)
+ sys.exit(logimport(database, url, opts.svn))
if __name__ == "__main__":
- database = sys.argv[1]
- url = sys.argv[2]
- if len(sys.argv) > 3:
- svn = sys.argv[3]
- else:
- svn = "svn"
- logging.basicConfig(level=logging.INFO, stream=sys.stderr)
- logimport(database, url, svn)
+ main()
Modified: subversion/trunk/notes/directory-index/schema.sql
URL: http://svn.apache.org/viewvc/subversion/trunk/notes/directory-index/schema.sql?rev=1301806&r1=1301805&r2=1301806&view=diff
==============================================================================
--- subversion/trunk/notes/directory-index/schema.sql (original)
+++ subversion/trunk/notes/directory-index/schema.sql Fri Mar 16 22:42:01 2012
@@ -16,6 +16,15 @@
-- specific language governing permissions and limitations
-- under the License.
+
+---SCRIPT CREATE_SCHEMA
+
+DROP TABLE IF EXISTS dirindex;
+DROP TABLE IF EXISTS strindex;
+DROP TABLE IF EXISTS revision;
+
+-- Revision record
+
CREATE TABLE revision (
version integer NOT NULL PRIMARY KEY,
created timestamp NOT NULL,
@@ -23,73 +32,152 @@ CREATE TABLE revision (
log varchar NULL
);
-CREATE TABLE pathindex (
- pathid integer NOT NULL PRIMARY KEY,
- abspath varchar NOT NULL UNIQUE
+-- Path lookup table
+
+CREATE TABLE strindex (
+ strid integer NOT NULL PRIMARY KEY,
+ content varchar NOT NULL UNIQUE
);
+-- Versioned directory tree
+
CREATE TABLE dirindex (
-- unique id of this node revision, used for
-- predecessor/successor links
rowid integer NOT NULL PRIMARY KEY,
+ -- link to this node's immediate predecessor
+ origin integer NULL REFERENCES dirindex(rowid),
+
-- absolute (repository) path
- pathid integer NOT NULL REFERENCES pathindex(pathid),
+ pathid integer NOT NULL REFERENCES strindex(strid),
-- revision number
version integer NOT NULL REFERENCES revision(version),
- -- node deletion flag
- deleted boolean NOT NULL,
-
- -- node kind (0 = dir, 1 = file, etc.)
- kind integer NOT NULL,
-
- -- predecessor link
- origin integer NULL REFERENCES dirindex(rowid),
+ -- node kind (D = dir, F = file, etc.)
+ kind character(1) NOT NULL,
- -- the predecessor is a copy source
- copied boolean NOT NULL,
+ -- the operation that produced this entry:
+ -- A = add, R = replace, M = modify, D = delete, N = rename
+ opcode character(1) NOT NULL,
-- the index entry is the result of an implicit subtree operation
subtree boolean NOT NULL
);
CREATE UNIQUE INDEX dirindex_versioned_tree ON dirindex(pathid, version DESC);
CREATE INDEX dirindex_successor_list ON dirindex(origin);
-CREATE INDEX dirindex_deleted ON dirindex(deleted);
+CREATE INDEX dirindex_operation ON dirindex(opcode);
+
+-- Repository root
--- repository root
INSERT INTO revision (version, created, author, log)
VALUES (0, 'EPOCH', NULL, NULL);
-INSERT INTO pathindex (pathid, abspath) VALUES (0, '/');
-INSERT INTO dirindex (rowid, pathid, version, deleted,
- kind, origin, copied, subtree)
- VALUES (0, 0, 0, 0, 0, NULL, 0, 0);
-
-
--- lookup PATH@REVISION
-
-SELECT
- dirindex.*, pathindex.abspath
-FROM dirindex JOIN pathindex
- ON dirindex.pathid = pathindex.pathid
-WHERE
- pathindex.abspath = '' -- $PATH
- AND dirindex.version <= 0 -- $REVISION
-ORDER BY pathindex.abspath ASC, dirindex.version DESC
-LIMIT 1; -- then check dirindex.deleted
-
--- single-revision tree for REVISION
-
-SELECT
- dirindex.*, pathindex.abspath
-FROM dirindex JOIN pathindex
- ON dirindex.pathid = pathindex.pathid
+INSERT INTO strindex (strid, content) VALUES (0, '/');
+INSERT INTO dirindex (rowid, origin, pathid, version, kind, opcode, subtree)
+ VALUES (0, NULL, 0, 0, 'D', 'A', 0);
+
+
+---STATEMENT INSERT_REVISION_RECORD
+
+INSERT INTO revision (version, created, author, log)
+ VALUES (?, ?, ?, ?);
+
+---STATEMENT GET_REVENT_BY_VERSION
+
+SELECT * FROM revision WHERE version = ?;
+
+---STATEMENT INSERT_STRINDEX_RECORD
+
+INSERT INTO strindex (content) VALUES (?);
+
+---STATEMENT GET_STRENT_BY_STRID
+
+SELECT * FROM strindex WHERE strid = ?;
+
+---STATEMENT GET_STRENT_BY_CONTENT
+
+SELECT * FROM strindex WHERE content = ?;
+
+---STATEMENT INSERT_DIRINDEX_RECORD
+
+INSERT INTO dirindex (origin, pathid, version, kind, opcode, subtree)
+ VALUES (?, ?, ?, ?, ?, ?);
+
+---STATEMENT GET_DIRENT_BY_ROWID
+
+SELECT dirindex.*, strindex.content FROM dirindex
+ JOIN strindex ON dirindex.pathid = strindex.strid
+WHERE dirindex.rowid = ?;
+
+---STATEMENT GET_DIRENT_BY_ABSPATH_AND_VERSION
+
+SELECT dirindex.*, strindex.content AS abspath FROM dirindex
+ JOIN strindex ON dirindex.pathid = strindex.strid
+WHERE abspath = ? AND dirindex.version = ?;
+
+---STATEMENT LOOKUP_ABSPATH_AT_REVISION
+
+SELECT dirindex.*, strindex.content AS abspath FROM dirindex
+ JOIN strindex ON dirindex.pathid = strindex.strid
+WHERE abspath = ? AND dirindex.version <= ?
+ORDER BY abspath ASC, dirindex.version DESC
+LIMIT 1;
+
+---STATEMENT LIST_SUBTREE_AT_REVISION
+
+SELECT dirindex.*, strindex.content AS abspath FROM dirindex
+ JOIN strindex ON dirindex.pathid = strindex.strid
JOIN (SELECT pathid, MAX(version) AS maxver FROM dirindex
- WHERE version <= 0 -- $REVISION
- GROUP BY pathid)
- AS filtered
+ WHERE version <= ? GROUP BY pathid)
+ AS filtered
ON dirindex.pathid == filtered.pathid
- AND dirindex.version == filtered.maxver
-WHERE NOT dirindex.deleted
-ORDER BY pathindex.abspath ASC;
+ AND dirindex.version == filtered.maxver
+WHERE abspath LIKE ? ESCAPE '#'
+ AND dirindex.opcode <> 'D'
+ORDER BY abspath ASC;
+
+---STATEMENT LIST_DIRENT_SUCCESSORS
+
+SELECT dirindex.*, strindex.content AS abspath FROM dirindex
+ JOIN strindex ON dirindex.pathid = strindex.strid
+WHERE dirindex.origin = ?
+ORDER BY abspath ASC, dirindex.version ASC;
+
+
+-- Temporary transaction
+
+---SCRIPT CREATE_TRANSACTION_CONTEXT
+
+CREATE TEMPORARY TABLE txncontext (
+ origin integer NULL,
+ abspath varchar NOT NULL UNIQUE,
+ kind character(1) NOT NULL,
+ opcode character(1) NOT NULL,
+ subtree boolean NOT NULL
+);
+
+---SCRIPT REMOVE_TRANSACTION_CONTEXT
+
+DROP TABLE IF EXISTS temp.txncontext;
+
+---STATEMENT INSERT_TRANSACTION_RECORD
+
+INSERT INTO temp.txncontext (origin, abspath, kind, opcode, subtree)
+ VALUES (?, ?, ?, ?, ?);
+
+---STATEMENT GET_TRANSACTION_RECORD
+
+SELECT * FROM temp.txncontext WHERE abspath = ?;
+
+---STATEMENT REMOVE_TRANSACTION_RECORD
+
+DELETE FROM temp.txncontext WHERE abspath = ?;
+
+---STATEMENT REMOVE_TRANSACTION_SUBTREE
+
+DELETE FROM temp.txncontext WHERE abspath LIKE ? ESCAPE '#';
+
+---STATEMENT LIST_TRANSACTION_RECORDS
+
+SELECT * FROM temp.txncontext ORDER BY abspath ASC;