You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@subversion.apache.org by br...@apache.org on 2012/03/16 23:42:02 UTC

svn commit: r1301806 - in /subversion/trunk/notes/directory-index: dirindex.py logimport.py schema.sql

Author: brane
Date: Fri Mar 16 22:42:01 2012
New Revision: 1301806

URL: http://svn.apache.org/viewvc?rev=1301806&view=rev
Log:
The Great Refactoring.

* notes/directory-index/schema.sql:
  Rename "pathindex" to the more generic "strindex".
  Record tree-change operations in the directory index.
  Define all interesting queries here.
* notes/directory-index/dirindex.py:
  Parse schema.sql to load schema and queries.
  Use a temporary (in-memory) table to build the tree transaction,
  this is orders of magnitude faster than using Python dicts and lists,
  and also quite a bit kinder to the memory manager.
* notes/directory-index/logimport.py:
  Better logging configuration and error handling.
  use proper option parsing.

Modified:
    subversion/trunk/notes/directory-index/dirindex.py
    subversion/trunk/notes/directory-index/logimport.py
    subversion/trunk/notes/directory-index/schema.sql

Modified: subversion/trunk/notes/directory-index/dirindex.py
URL: http://svn.apache.org/viewvc/subversion/trunk/notes/directory-index/dirindex.py?rev=1301806&r1=1301805&r2=1301806&view=diff
==============================================================================
--- subversion/trunk/notes/directory-index/dirindex.py (original)
+++ subversion/trunk/notes/directory-index/dirindex.py Fri Mar 16 22:42:01 2012
@@ -23,10 +23,64 @@ import sqlite3
 
 
 class Error(Exception):
-    pass
+    def __init__(self, msg, *args, **kwargs):
+        opcode = kwargs.pop("action", None)
+        if opcode is not None:
+            msg = Dirent._opname(opcode) + msg
+        super(Error, self).__init__(msg, *args, **kwargs)
+
+
+class SQL(object):
+    """Named index of SQL schema definitions and statements.
+
+    Parses "schema.sql" and creates a class-level attribute for each
+    script and statement in that file.
+    """
+
+    @classmethod
+    def _load_statements(cls):
+        import cStringIO
+        import pkgutil
+        import re
+
+        comment_rx = re.compile(r"\s*--.*$")
+        header_rx = re.compile(r"^---(STATEMENT|SCRIPT)"
+                               r"\s+(?P<name>[_A-Z]+)$")
+
+        name = None
+        content = None
+
+        def record_current_statement():
+            if name is not None:
+                setattr(cls, name, content.getvalue())
+
+        schema = cStringIO.StringIO(pkgutil.get_data(__name__, "schema.sql"))
+        for line in schema:
+            line = line.rstrip()
+            if not line:
+                continue
+
+            header = header_rx.match(line)
+            if header:
+                record_current_statement()
+                name = header.group("name")
+                content = cStringIO.StringIO()
+                continue
+
+            line = comment_rx.sub("", line)
+            if not line:
+                continue
+
+            if content is not None:
+                content.write(line)
+                content.write("\n")
+        record_current_statement()
+SQL._load_statements()
 
 
 class SQLobject(object):
+    """Base for ORM abstractions."""
+
     __slots__ = ()
     def __init__(self, **kwargs):
         for name, val in kwargs.items():
@@ -68,91 +122,105 @@ class SQLobject(object):
 
 
 class Revent(SQLobject):
+    """O/R mapping for the "revision" table."""
+
     __slots__ = ("version", "created", "author", "log")
 
     def _put(self, cursor):
         if self.created is None:
             now = datetime.datetime.utcnow()
             self.created = now.strftime("%Y-%m-%dT%H:%M:%S.%fZ")
-        self._execute(cursor,
-                      "INSERT INTO revision (version, created, author, log)"
-                      " VALUES (?, ?, ?, ?)",
+        self._execute(cursor, SQL.INSERT_REVISION_RECORD,
                       [self.version, self.created, self.author, self.log])
 
     @classmethod
     def _get(cls, cursor, pkey):
-        cursor.execute("SELECT * FROM revision WHERE version = ?", [pkey])
+        cursor.execute(SQL.GET_REVENT_BY_VERSION, [pkey])
         return cls._from_row(cursor.fetchone())
 
 
-class Pathent(SQLobject):
-    __slots__ = ("pathid", "abspath")
+class Strent(SQLobject):
+    """O/R mapping for the "strindex" table."""
+
+    __slots__ = ("strid", "content")
 
     def _put(self, cursor):
-        self._execute(cursor,
-                      "INSERT INTO pathindex (abspath) VALUES (?)",
-                      [self.abspath])
-        self.pathid = cursor.lastrowid
+        self._execute(cursor, SQL.INSERT_STRINDEX_RECORD, [self.content])
+        self.strid = cursor.lastrowid
 
     @classmethod
     def _get(cls, cursor, pkey):
-        cls._execute(cursor,
-                     "SELECT * FROM pathindex WHERE pathid = ?",
-                     [pkey])
+        cls._execute(cursor, SQL.GET_STRENT_BY_STRID, [pkey])
         return cls._from_row(cursor.fetchone())
 
     @classmethod
-    def _find(cls, cursor, abspath):
-        cls._execute(cursor,
-                     "SELECT * FROM pathindex WHERE abspath = ?",
-                     [abspath])
+    def _find(cls, cursor, content):
+        cls._execute(cursor, SQL.GET_STRENT_BY_CONTENT, [content])
         return cls._from_row(cursor.fetchone())
 
 
 class Dirent(SQLobject):
-    __slots__ = ("rowid", "pathid", "version", "deleted",
-                 "kind", "origin", "copied", "subtree",
+    """O/R mapping for a virtual non-materialized view representing
+    a natural join of the "dirindex" and "pathindex" tables."""
+
+    __slots__ = ("rowid", "origin", "pathid", "version",
+                 "kind", "opcode", "subtree",
                  "abspath")
 
+    # Kinds
+    DIR = "D"
+    FILE = "F"
+
+    # Opcodes
+    ADD = "A"
+    REPLACE = "R"
+    MODIFY = "M"
+    DELETE = "D"
+    RENAME = "N"
+
+    # Opcode names
+    __opnames = {ADD: "add",
+                 REPLACE: "replace",
+                 MODIFY: "modify",
+                 DELETE: "delete",
+                 RENAME: "rename"}
+
+    @classmethod
+    def _opname(cls, opcode):
+        return cls.__opnames.get(opcode)
+
+    @property
+    def _deleted(self):
+        return (self.opcode == self.DELETE)
+
     def __str__(self):
-        return "%3d %c%c %s" % (
+        return "%d %c%c%c %c %s" % (
             self.version,
-            self.deleted and "x" or " ",
-            self.kind and "f" or "d",
-            self.abspath)
+            self.subtree and "(" or " ",
+            self.opcode,
+            self.subtree and ")" or " ",
+            self.kind, self.abspath)
 
     def _put(self, cursor):
-        pathent = Pathent._find(cursor, self.abspath)
-        if pathent is None:
-            pathent = Pathent(abspath = self.abspath)
-            pathent._put(cursor)
-        self._execute(cursor,
-                      "INSERT INTO dirindex"
-                      " (pathid, version, deleted,"
-                      " kind, origin, copied, subtree)"
-                      " VALUES (?, ?, ?, ?, ?, ?, ?)",
-                      [pathent.pathid, self.version, self.deleted,
-                       self.kind, self.origin, self.copied, self.subtree])
+        strent = Strent._find(cursor, self.abspath)
+        if strent is None:
+            strent = Strent(content = self.abspath)
+            strent._put(cursor)
+        self._execute(cursor, SQL.INSERT_DIRINDEX_RECORD,
+                      [self.origin, strent.strid, self.version,
+                       self.kind, self.opcode,self.subtree])
         self.rowid = cursor.lastrowid
-        self.pathid = pathent.pathid
+        self.pathid = strent.strid
 
     @classmethod
     def _get(cls, cursor, pkey):
-        cls._execute(cursor,
-                     "SELECT dirindex.*, pathindex.abspath"
-                     " FROM dirindex JOIN pathindex"
-                     " ON dirindex.pathid = pathindex.pathid"
-                     " WHERE dirindex.rowid = ?", [pkey])
+        cls._execute(cursor, SQL.GET_DIRENT_BY_ROWID, [pkey])
         return cls._from_row(cursor.fetchone())
 
     @classmethod
     def _find(cls, cursor, abspath, version):
         cls._execute(cursor,
-                     "SELECT dirindex.*, pathindex.abspath"
-                     " FROM dirindex JOIN pathindex"
-                     " ON dirindex.pathid = pathindex.pathid"
-                     " WHERE pathindex.abspath = ?"
-                     " AND dirindex.version = ?",
+                     SQL.GET_DIRENT_BY_ABSPATH_AND_VERSION,
                      [abspath, version])
         return cls._from_row(cursor.fetchone())
 
@@ -162,53 +230,27 @@ class Index(object):
         self.conn = sqlite3.connect(database, isolation_level = "IMMEDIATE")
         self.conn.row_factory = sqlite3.Row
         self.cursor = self.conn.cursor()
+        self.cursor.execute("PRAGMA page_size = 4096")
+        self.cursor.execute("PRAGMA temp_store = MEMORY")
         self.cursor.execute("PRAGMA foreign_keys = ON")
         self.cursor.execute("PRAGMA case_sensitive_like = ON")
         self.cursor.execute("PRAGMA encoding = 'UTF-8'")
 
-    __schema = """
-DROP TABLE IF EXISTS dirindex;
-DROP TABLE IF EXISTS pathindex;
-DROP TABLE IF EXISTS revision;
-
-CREATE TABLE revision (
-  version integer NOT NULL PRIMARY KEY,
-  created timestamp NOT NULL,
-  author  varchar NULL,
-  log     varchar NULL
-);
-
-CREATE TABLE pathindex (
-  pathid  integer NOT NULL PRIMARY KEY,
-  abspath varchar NOT NULL UNIQUE
-);
-
-CREATE TABLE dirindex (
-  rowid   integer NOT NULL PRIMARY KEY,
-  pathid  integer NOT NULL REFERENCES pathindex(pathid),
-  version integer NOT NULL REFERENCES revision(version),
-  deleted boolean NOT NULL,
-  kind    integer NOT NULL,
-  origin  integer NULL REFERENCES dirindex(rowid),
-  copied  boolean NOT NULL,
-  subtree boolean NOT NULL
-);
-CREATE UNIQUE INDEX dirindex_versioned_tree ON dirindex(pathid, version DESC);
-CREATE INDEX dirindex_successor_list ON dirindex(origin);
-CREATE INDEX dirindex_deleted ON dirindex(deleted);
-
-INSERT INTO revision (version, created, author, log)
-  VALUES (0, 'EPOCH', NULL, NULL);
-INSERT INTO pathindex (pathid, abspath) VALUES (0, '/');
-INSERT INTO dirindex (rowid, pathid, version, deleted,
-                      kind, origin, copied, subtree)
-  VALUES (0, 0, 0, 0, 0, NULL, 0, 0);
-"""
+    @staticmethod
+    def normpath(abspath):
+        return abspath.rstrip("/")
+
+    @staticmethod
+    def subtree_pattern(abspath):
+        return (abspath.rstrip("/")
+                .replace("#", "##")
+                .replace("%", "#%")
+                .replace("_", "#_")) + "/%"
 
     def initialize(self):
         try:
-            SQLobject._log("%s", self.__schema)
-            self.cursor.executescript(self.__schema)
+            SQLobject._log("%s", SQL.CREATE_SCHEMA)
+            self.cursor.executescript(SQL.CREATE_SCHEMA)
             self.commit()
         finally:
             self.rollback()
@@ -245,34 +287,20 @@ INSERT INTO dirindex (rowid, pathid, ver
     def lookup(self, abspath, version):
         SQLobject._execute(
             self.cursor,
-            "SELECT dirindex.*, pathindex.abspath FROM dirindex"
-            " JOIN pathindex ON dirindex.pathid = pathindex.pathid"
-            " WHERE pathindex.abspath = ? AND dirindex.version <= ?"
-            " ORDER BY pathindex.abspath ASC, dirindex.version DESC"
-            " LIMIT 1",
+            SQL.LOOKUP_ABSPATH_AT_REVISION,
             [abspath, version])
         row = self.cursor.fetchone()
-        if row is not None and not row["deleted"]:
-            return Dirent._from_row(row)
+        if row is not None:
+            dirent = Dirent._from_row(row)
+            if not dirent._deleted:
+                return dirent
         return None
 
     def subtree(self, abspath, version):
-        pattern = (abspath.rstrip("/")
-                   .replace("#", "##")
-                   .replace("%", "#%")
-                   .replace("_", "#_")) + "/%"
         SQLobject._execute(
             self.cursor,
-            "SELECT dirindex.*, pathindex.abspath FROM dirindex"
-            " JOIN pathindex ON dirindex.pathid = pathindex.pathid"
-            " JOIN (SELECT pathid, MAX(version) AS maxver FROM dirindex"
-            " WHERE version <= ? GROUP BY pathid) AS filtered"
-            " ON dirindex.pathid == filtered.pathid"
-            " AND dirindex.version == filtered.maxver"
-            " WHERE pathindex.abspath LIKE ? ESCAPE '#'"
-            " AND NOT dirindex.deleted"
-            " ORDER BY pathindex.abspath ASC",
-            [version, pattern])
+            SQL.LIST_SUBTREE_AT_REVISION,
+            [version, self.subtree_pattern(abspath)])
         for row in self.cursor:
             yield Dirent._from_row(row)
 
@@ -286,10 +314,7 @@ INSERT INTO dirindex (rowid, pathid, ver
         assert isinstance(dirent, Dirent)
         SQLobject._execute(
             self.cursor,
-            "SELECT dirindex.*, pathindex.abspath FROM dirindex"
-            " JOIN pathindex ON dirindex.pathid = pathindex.pathid"
-            " WHERE dirindex.origin = ?"
-            " ORDER BY pathindex.abspath ASC, dirindex.version ASC",
+            SQL.LIST_DIRENT_SUCCESSORS,
             [dirent.rowid])
         for row in self.cursor:
             yield Dirent._from_row(row)
@@ -304,160 +329,222 @@ class Revision(object):
         self.__created = created
         self.__author = author
         self.__log = log
-        self.__context = None
+        self.__txn = None
+        self.__deferred = None
         index.rollback()
 
+    class __Context(object):
+        def __init__(self, version, connection):
+            self.version = version
+            self.conn = connection
+            self.cursor = connection.cursor()
+            SQLobject._execute(self.cursor, SQL.CREATE_TRANSACTION_CONTEXT)
+
+        def clear(self):
+            SQLobject._execute(self.cursor, SQL.REMOVE_TRANSACTION_CONTEXT)
+
+        def __iter__(self):
+            SQLobject._execute(self.cursor, SQL.LIST_TRANSACTION_RECORDS)
+            for row in self.cursor:
+                dirent = Dirent._from_row(row)
+                dirent.version = self.version
+                yield dirent
+
+        def lookup(self, abspath):
+            SQLobject._execute(self.cursor,
+                               SQL.GET_TRANSACTION_RECORD,
+                               [abspath])
+            row = self.cursor.fetchone()
+            if row is not None:
+                dirent = Dirent._from_row(row)
+                dirent.version = self.version
+                return dirent
+            return None
+
+        def remove(self, abspath, purge=False):
+            target = self.lookup(abspath)
+            if not target:
+                raise Error("txn context: remove nonexistent " + abspath)
+            logging.debug("txn context: remove %s", abspath)
+            SQLobject._execute(self.cursor,
+                               SQL.REMOVE_TRANSACTION_RECORD,
+                               [abspath])
+            if purge:
+                logging.debug("txn context: purge %s/*", abspath)
+                SQLobject._execute(self.cursor,
+                                   SQL.REMOVE_TRANSACTION_SUBTREE,
+                                   [Index.subtree_pattern(abspath)])
+
+        def record(self, dirent, replace=False, purge=False):
+            target = self.lookup(dirent.abspath)
+            if target is not None:
+                if not replace:
+                    raise Error("txn context: record existing "
+                                + dirent.abspath)
+                elif not target.subtree:
+                    raise Error("txn context: replace conflict "
+                                + dirent.abspath)
+                self.remove(target.abspath, purge and target.kind == Dirent.DIR)
+            SQLobject._execute(self.cursor,
+                               SQL.INSERT_TRANSACTION_RECORD,
+                               [dirent.origin, dirent.abspath,
+                                dirent.kind, dirent.opcode, dirent.subtree])
+
     def __enter__(self):
         if self.revent is not None:
             raise Error("revision is read-only")
-        SQLobject._log("BEGIN")
+        self.__context = self.__Context(self.version, self.index.conn)
+        SQLobject._execute(self.index.cursor, "BEGIN")
         self.revent = self.index.new_revision(
             self.version, self.__created, self.__author, self.__log)
-        self.__context = {}
         return self
 
     def __exit__(self, exc_type, exc_value, traceback):
         try:
-            if exc_type is None and len(self.__context):
-                for dirent in sorted(self.__context.itervalues()):
+            if exc_type is None:
+                for dirent in self.__context:
                     self.index.insert(dirent)
+                    logging.debug("insert: %s", dirent)
                 self.index.commit()
+            else:
+                self.index.rollback()
         except:
             self.index.rollback()
             raise
         finally:
+            self.__context.clear()
             self.__context = None
 
-    def __record(self, dirent, action):
-        self.__context[dirent.abspath] = dirent
-        if dirent.subtree:
-            action = "(%s)" % action
-        else:
-            action = " %s " % action
-        logging.debug(" %-9s %s", action, dirent)
+    def __record(self, dirent, replace=False, purge=False):
+        self.__context.record(dirent, replace, purge)
+        logging.debug("record: %s", dirent)
 
-    def __check_writable(self, action):
+    def __check_writable(self, opcode):
         if self.__context is None:
-            raise Error(action + " requires a transaction")
+            raise Error(" requires a transaction", action=opcode)
 
-    def __check_not_root(self, abspath, action):
+    def __check_not_root(self, abspath, opcode):
         if abspath.rstrip("/") == "":
-            raise Error(action + " not allowed on /")
+            raise Error(" not allowed on /", action=opcode)
 
-    def __find_target(self, abspath, action):
-        target = self.__context.get(abspath)
-        if target is not None and not target.subtree:
-            raise Error(action + " overrides explicit " + abspath)
-        if target is None:
-            target = self.index.lookup(abspath, self.version - 1)
+    def __find_target(self, abspath, opcode):
+        target = self.__context.lookup(abspath)
+        if target is not None:
+            if not target.subtree:
+                raise Error(" overrides explicit " + abspath, action=opcode)
+            return target, target.origin
+        target = self.index.lookup(abspath, self.version - 1)
         if target is None:
-            raise Error(action + " target does not exist: " + abspath)
-        return target
+            raise Error(" target does not exist: " + abspath, action=opcode)
+        return target, target.rowid
 
     def lookup(self, abspath):
         try:
-            return self.index.lookup(abspath, self.version)
+            return self.index.lookup(self.index.normpath(abspath),
+                                     self.version)
         finally:
             if self.__context is None:
                 self.index.rollback()
 
-    def __add(self, action, abspath, kind, frompath, fromver):
+    def __add(self, opcode, abspath, kind, frompath, fromver):
         origin = None
         if frompath is not None:
+            frompath = self.index.normpath(frompath)
+            fromver = int(fromver)
             origin = self.index.lookup(frompath, fromver)
             if origin is None:
-                raise Error(action + " source does not exist: " + frompath)
+                raise Error(" source does not exist: " + frompath, action=opcode)
             if origin.kind != kind:
-                raise Error(action + " changes the source object kind")
+                raise Error(" changes the source object kind", action=opcode)
             origin = origin.rowid
-        dirent = Dirent(abspath = abspath,
+        dirent = Dirent(origin = origin,
+                        abspath = abspath,
                         version = self.version,
-                        deleted = 0,
                         kind = kind,
-                        origin = origin,
-                        copied = int(origin is not None),
+                        opcode = opcode,
                         subtree = 0)
-        self.__record(dirent, action)
-        if frompath is not None:
-            offset = len(frompath.rstrip("/"))
-            prefix = abspath.rstrip("/")
-            for source in self.index.subtree(frompath, fromver):
-                dirent = Dirent(rowid = source.rowid,
-                                abspath = prefix + source.abspath[offset:],
-                                version = self.version,
-                                deleted = 0,
-                                kind = source.kind,
-                                origin = source.rowid,
-                                copied = 1,
-                                subtree = 1)
-                self.__record(dirent, action)
+        self.__record(dirent,
+                      replace=(opcode == Dirent.REPLACE),
+                      purge=(opcode == Dirent.REPLACE))
+        if frompath is not None and dirent.kind == Dirent.DIR:
+            prefix = dirent.abspath
+            offset = len(frompath)
+            for source in list(self.index.subtree(frompath, fromver)):
+                abspath = prefix + source.abspath[offset:]
+                self.__record(Dirent(origin = source.rowid,
+                                     abspath = abspath,
+                                     version = self.version,
+                                     kind = source.kind,
+                                     opcode = opcode,
+                                     subtree = 1))
 
     def add(self, abspath, kind, frompath=None, fromver=None):
-        action = "add"
-        self.__check_writable(action)
-        self.__check_not_root(abspath, action)
-        return self.__add(action, abspath, kind, frompath, fromver)
+        opcode = Dirent.ADD
+        abspath = self.index.normpath(abspath)
+        self.__check_writable(opcode)
+        self.__check_not_root(abspath, opcode)
+        return self.__add(opcode, abspath, kind, frompath, fromver)
 
     def replace(self, abspath, kind, frompath=None, fromver=None):
-        action = "replace"
-        self.__check_writable(action)
-        self.__check_not_root(abspath, action)
-        self.__find_target(abspath, action)
-        return self.__add(action, abspath, kind, frompath, fromver)
+        opcode = Dirent.REPLACE
+        abspath = self.index.normpath(abspath)
+        self.__check_writable(opcode)
+        self.__check_not_root(abspath, opcode)
+        self.__find_target(abspath, opcode)
+        return self.__add(opcode, abspath, kind, frompath, fromver)
 
     def modify(self, abspath):
-        action = "modify"
-        self.__check_writable(action)
-        target = self.__find_target(abspath, action)
-        dirent = Dirent(abspath = abspath,
+        opcode = Dirent.MODIFY
+        abspath = self.index.normpath(abspath)
+        self.__check_writable(opcode)
+        target, origin = self.__find_target(abspath, opcode)
+        dirent = Dirent(origin = origin,
+                        abspath = abspath,
                         version = self.version,
-                        deleted = 0,
                         kind = target.kind,
-                        origin = target.rowid,
-                        copied = 0,
+                        opcode = opcode,
                         subtree = 0)
-        self.__record(dirent, action)
+        self.__record(dirent, replace=True)
 
     def delete(self, abspath):
-        action = "replace"
-        self.__check_writable(action)
-        self.__check_not_root(abspath, action)
-        target = self.__find_target(abspath, action)
-        dirent = Dirent(abspath = abspath,
+        opcode = Dirent.DELETE
+        abspath = self.index.normpath(abspath)
+        self.__check_writable(opcode)
+        self.__check_not_root(abspath, opcode)
+        target, origin = self.__find_target(abspath, opcode)
+        dirent = Dirent(origin = origin,
+                        abspath = abspath,
                         version = self.version,
-                        deleted = 1,
                         kind = target.kind,
-                        origin = target.rowid,
-                        copied = 0,
+                        opcode = opcode,
                         subtree = 0)
-        self.__record(dirent, action)
-        for source in self.index.subtree(abspath, self.version - 1):
-            dirent = Dirent(rowid = source.rowid,
-                            abspath = source.abspath,
-                            version = self.version,
-                            deleted = 1,
-                            kind = source.kind,
-                            origin = source.rowid,
-                            copied = 0,
-                            subtree = 1)
-            self.__record(dirent, action)
+        self.__record(dirent, replace=True, purge=True)
+        if target.version < self.version and dirent.kind == Dirent.DIR:
+            for source in self.index.subtree(abspath, self.version - 1):
+                self.__record(Dirent(origin = source.rowid,
+                                     abspath = source.abspath,
+                                     version = self.version,
+                                     kind = source.kind,
+                                     opcode = opcode,
+                                     subtree = 1))
 
 
 def simpletest(database):
     ix = Index(database)
     ix.initialize()
     with Revision(ix, 1) as rev:
-        rev.add(u'/A', 0)
-        rev.add(u'/A/B', 0)
-        rev.add(u'/A/B/c', 1)
+        rev.add(u'/A', Dirent.DIR)
+        rev.add(u'/A/B', Dirent.DIR)
+        rev.add(u'/A/B/c', Dirent.FILE)
     with Revision(ix, 2) as rev:
-        rev.add(u'/A/B/d', 1)
+        rev.add(u'/A/B/d', Dirent.FILE)
     with Revision(ix, 3) as rev:
-        rev.add(u'/X', 0, u'/A', 1)
-        rev.add(u'/X/B/d', 1, u'/A/B/d', 2)
+        rev.add(u'/X', Dirent.DIR, u'/A', 1)
+        rev.add(u'/X/B/d', Dirent.FILE, u'/A/B/d', 2)
     with Revision(ix, 4) as rev:
+        # rev.rename(u'/X/B/d', u'/X/B/x')
         rev.delete(u'/X/B/d')
-        rev.add(u'/X/B/x', 1, u'/X/B/d', 3)
+        rev.add(u'/X/B/x', Dirent.FILE, u'/X/B/d', 3)
     with Revision(ix, 5) as rev:
         rev.delete(u'/A')
 
@@ -474,12 +561,13 @@ def simpletest(database):
     print "/A/B/c@4 -> %s@%d" % (dirent.abspath, dirent.version)
     for succ in ix.successors(dirent):
         print "%11s %s %s@%d" % (
-            "", succ.deleted and "x_x" or "-->",
+            "", succ._deleted and "x_x" or "-->",
             succ.abspath, succ.version)
 
     ix.close()
 
 def loggedsimpletest(database):
     import sys
-    logging.basicConfig(level=SQLobject.LOGLEVEL, stream=sys.stderr)
+    logging.basicConfig(level=logging.DEBUG, #SQLobject.LOGLEVEL,
+                        stream=sys.stderr)
     simpletest(database)

Modified: subversion/trunk/notes/directory-index/logimport.py
URL: http://svn.apache.org/viewvc/subversion/trunk/notes/directory-index/logimport.py?rev=1301806&r1=1301805&r2=1301806&view=diff
==============================================================================
--- subversion/trunk/notes/directory-index/logimport.py (original)
+++ subversion/trunk/notes/directory-index/logimport.py Fri Mar 16 22:42:01 2012
@@ -16,7 +16,11 @@
 # specific language governing permissions and limitations
 # under the License.
 
-# Usage: logimport <database-name> <repoa-url> [path-to-svn]
+# Usage: logimport [options] <database-name> <repoa-url>
+# Options:
+#   --svn=PATH     Use a non-default svn binary
+#   --debug        Enable debug-level logging to logimport.debug.log
+#   --sqldebug     Enable SQL-level logging to logimport.sql.log
 #
 # Converts the history of the repository at <repos-url> into a
 # single-tree directory index.
@@ -31,11 +35,11 @@ try:
 except ImportError:
     from xml.etree.ElementTree import iterparse
 
-from dirindex import Index, Revision
+from dirindex import Dirent, Index, Revision
 
 
 def parse(index, stream):
-    kindmap = {"dir": 0, "file": 1}
+    kindmap = {"dir": Dirent.DIR, "file": Dirent.FILE}
 
     version = None
     revcount = 0
@@ -46,11 +50,11 @@ def parse(index, stream):
         version = int(logentry.get("revision"))
 
         revcount += 1
-        if revcount == 1 or not revcount % 1000:
-            revlogger = logging.info
+        if revcount == 1:
+            logging.info("initial: r%d", version)
         else:
-            revlogger = logging.debug
-        revlogger("%d: r%d", revcount, version)
+            logger = not revcount % 1000 and logging.info or logging.debug
+            logger("%d: r%d", revcount, version)
 
         created = logentry.find("date")
         if created is not None:
@@ -77,7 +81,7 @@ def parse(index, stream):
                 action = path.get("action")
                 handler, newnode = actionmap[action]
                 if not newnode:
-                    logging.debug("  %-s      %s", action, abspath)
+                    logging.debug("  %s      %s", action, abspath)
                     handler(abspath)
                     continue
 
@@ -98,24 +102,83 @@ def parse(index, stream):
 
 
 def logimport(database, url, svn):
-    index = Index(database)
-    index.initialize()
-    index.cursor.execute("PRAGMA journal_mode = MEMORY")
-    index.cursor.execute("PRAGMA locking_mode = EXCLUSIVE")
-    index.cursor.execute("PRAGMA synchronous = OFF")
-    svnlog = subprocess.Popen(
-        [svn, "log", "-v", "--xml", "-r1:HEAD", url],
-        stdout = subprocess.PIPE)
-    parse(index, svnlog.stdout)
-    sys.exit(svnlog.wait())
+    try:
+        index = Index(database)
+        index.cursor.execute("PRAGMA journal_mode = MEMORY")
+        index.cursor.execute("PRAGMA locking_mode = EXCLUSIVE")
+        index.cursor.execute("PRAGMA synchronous = OFF")
+        index.cursor.execute("PRAGMA cache_size = -100000")
+        index.initialize()
+        svnlog = subprocess.Popen(
+            [svn, "log", "-v", "--xml", "-r1:HEAD", url],
+            stdout = subprocess.PIPE)
+        parse(index, svnlog.stdout)
+        return svnlog.wait()
+    except:
+        logging.exception("logimport failed")
+        try:
+            svnlog.wait()
+        except:
+            pass
+        return 2
+
+
+def main():
+    import logging.config
+    from optparse import OptionParser
+    from dirindex import SQLobject
+
+    parser = OptionParser("Usage: %prog [options] <database-name> <repoa-url>")
+    parser.add_option("--svn", action="store", default="svn",
+                      help="Use a non-default svn binary", metavar="PATH")
+    parser.add_option("--debug", action="store_true", default=False,
+                      help="Enable debug-level logging to logimport.debug.log")
+    parser.add_option("--sqldebug", action="store_true", default=False,
+                      help="Enable SQL-level logging to logimport.debug.log")
+
+    opts, args = parser.parse_args()
+    if len(args) != 2:
+        parser.error("wrong number of arguments")
+    database, url = args
+
+    logconfig = {
+        "version": 1,
+        "formatters": {
+            "console": {"format": "%(levelname)-7s %(message)s"},
+            "logfile": {"format": "%(asctime)s %(levelname)-7s %(message)s"}},
+        "handlers": {
+            "console": {
+                "class": "logging.StreamHandler",
+                "level": logging.INFO,
+                "stream": sys.stderr,
+                "formatter": "console"}},
+        "root": {
+            "level": logging.INFO,
+            "handlers": ["console"]}}
+
+    handlers = logconfig["root"]["handlers"]
+    if opts.debug:
+        logconfig["root"]["level"] = logging.DEBUG
+        logconfig["handlers"]["debug"] = {
+            "class": "logging.FileHandler",
+            "level": logging.DEBUG,
+            "mode": "w",
+            "filename": "./logimport.debug.log",
+            "formatter": "logfile"}
+        handlers.append("debug")
+    if opts.sqldebug:
+        logconfig["root"]["level"] = SQLobject.LOGLEVEL
+        logconfig["handlers"]["sqldebug"] = {
+            "class": "logging.FileHandler",
+            "level": SQLobject.LOGLEVEL,
+            "mode": "w",
+            "filename": "./logimport.sql.log",
+            "formatter": "logfile"}
+        handlers.append("sqldebug")
+
+    logging.config.dictConfig(logconfig)
+    sys.exit(logimport(database, url, opts.svn))
 
 
 if __name__ == "__main__":
-    database = sys.argv[1]
-    url = sys.argv[2]
-    if len(sys.argv) > 3:
-        svn = sys.argv[3]
-    else:
-        svn = "svn"
-    logging.basicConfig(level=logging.INFO, stream=sys.stderr)
-    logimport(database, url, svn)
+    main()

Modified: subversion/trunk/notes/directory-index/schema.sql
URL: http://svn.apache.org/viewvc/subversion/trunk/notes/directory-index/schema.sql?rev=1301806&r1=1301805&r2=1301806&view=diff
==============================================================================
--- subversion/trunk/notes/directory-index/schema.sql (original)
+++ subversion/trunk/notes/directory-index/schema.sql Fri Mar 16 22:42:01 2012
@@ -16,6 +16,15 @@
 -- specific language governing permissions and limitations
 -- under the License.
 
+
+---SCRIPT CREATE_SCHEMA
+
+DROP TABLE IF EXISTS dirindex;
+DROP TABLE IF EXISTS strindex;
+DROP TABLE IF EXISTS revision;
+
+-- Revision record
+
 CREATE TABLE revision (
   version integer NOT NULL PRIMARY KEY,
   created timestamp NOT NULL,
@@ -23,73 +32,152 @@ CREATE TABLE revision (
   log     varchar NULL
 );
 
-CREATE TABLE pathindex (
-  pathid  integer NOT NULL PRIMARY KEY,
-  abspath varchar NOT NULL UNIQUE
+-- Path lookup table
+
+CREATE TABLE strindex (
+  strid   integer NOT NULL PRIMARY KEY,
+  content varchar NOT NULL UNIQUE
 );
 
+-- Versioned directory tree
+
 CREATE TABLE dirindex (
   -- unique id of this node revision, used for
   -- predecessor/successor links
   rowid   integer NOT NULL PRIMARY KEY,
 
+  -- link to this node's immediate predecessor
+  origin  integer NULL REFERENCES dirindex(rowid),
+
   -- absolute (repository) path
-  pathid  integer NOT NULL REFERENCES pathindex(pathid),
+  pathid  integer NOT NULL REFERENCES strindex(strid),
 
   -- revision number
   version integer NOT NULL REFERENCES revision(version),
 
-  -- node deletion flag
-  deleted boolean NOT NULL,
-
-  -- node kind (0 = dir, 1 = file, etc.)
-  kind    integer NOT NULL,
-
-  -- predecessor link
-  origin  integer NULL REFERENCES dirindex(rowid),
+  -- node kind (D = dir, F = file, etc.)
+  kind    character(1) NOT NULL,
 
-  -- the predecessor is a copy source
-  copied  boolean NOT NULL,
+  -- the operation that produced this entry:
+  -- A = add, R = replace, M = modify, D = delete, N = rename
+  opcode  character(1) NOT NULL,
 
   -- the index entry is the result of an implicit subtree operation
   subtree boolean NOT NULL
 );
 CREATE UNIQUE INDEX dirindex_versioned_tree ON dirindex(pathid, version DESC);
 CREATE INDEX dirindex_successor_list ON dirindex(origin);
-CREATE INDEX dirindex_deleted ON dirindex(deleted);
+CREATE INDEX dirindex_operation ON dirindex(opcode);
+
+-- Repository root
 
--- repository root
 INSERT INTO revision (version, created, author, log)
   VALUES (0, 'EPOCH', NULL, NULL);
-INSERT INTO pathindex (pathid, abspath) VALUES (0, '/');
-INSERT INTO dirindex (rowid, pathid, version, deleted,
-                      kind, origin, copied, subtree)
-  VALUES (0, 0, 0, 0, 0, NULL, 0, 0);
-
-
--- lookup PATH@REVISION
-
-SELECT
-  dirindex.*, pathindex.abspath
-FROM dirindex JOIN pathindex
-  ON dirindex.pathid = pathindex.pathid
-WHERE
-  pathindex.abspath = '' -- $PATH
-  AND dirindex.version <= 0 -- $REVISION
-ORDER BY pathindex.abspath ASC, dirindex.version DESC
-LIMIT 1;  -- then check dirindex.deleted
-
--- single-revision tree for REVISION
-
-SELECT
-  dirindex.*, pathindex.abspath
-FROM dirindex JOIN pathindex
-    ON dirindex.pathid = pathindex.pathid
+INSERT INTO strindex (strid, content) VALUES (0, '/');
+INSERT INTO dirindex (rowid, origin, pathid, version, kind, opcode, subtree)
+  VALUES (0, NULL, 0, 0, 'D', 'A', 0);
+
+
+---STATEMENT INSERT_REVISION_RECORD
+
+INSERT INTO revision (version, created, author, log)
+  VALUES (?, ?, ?, ?);
+
+---STATEMENT GET_REVENT_BY_VERSION
+
+SELECT * FROM revision WHERE version = ?;
+
+---STATEMENT INSERT_STRINDEX_RECORD
+
+INSERT INTO strindex (content) VALUES (?);
+
+---STATEMENT GET_STRENT_BY_STRID
+
+SELECT * FROM strindex WHERE strid = ?;
+
+---STATEMENT GET_STRENT_BY_CONTENT
+
+SELECT * FROM strindex WHERE content = ?;
+
+---STATEMENT INSERT_DIRINDEX_RECORD
+
+INSERT INTO dirindex (origin, pathid, version, kind, opcode, subtree)
+  VALUES (?, ?, ?, ?, ?, ?);
+
+---STATEMENT GET_DIRENT_BY_ROWID
+
+SELECT dirindex.*, strindex.content FROM dirindex
+  JOIN strindex ON dirindex.pathid = strindex.strid
+WHERE dirindex.rowid = ?;
+
+---STATEMENT GET_DIRENT_BY_ABSPATH_AND_VERSION
+
+SELECT dirindex.*, strindex.content AS abspath FROM dirindex
+  JOIN strindex ON dirindex.pathid = strindex.strid
+WHERE abspath = ? AND dirindex.version = ?;
+
+---STATEMENT LOOKUP_ABSPATH_AT_REVISION
+
+SELECT dirindex.*, strindex.content AS abspath FROM dirindex
+  JOIN strindex ON dirindex.pathid = strindex.strid
+WHERE abspath = ? AND dirindex.version <= ?
+ORDER BY abspath ASC, dirindex.version DESC
+LIMIT 1;
+
+---STATEMENT LIST_SUBTREE_AT_REVISION
+
+SELECT dirindex.*, strindex.content AS abspath FROM dirindex
+  JOIN strindex ON dirindex.pathid = strindex.strid
   JOIN (SELECT pathid, MAX(version) AS maxver FROM dirindex
-        WHERE version <= 0 -- $REVISION
-        GROUP BY pathid)
-      AS filtered
+        WHERE version <= ? GROUP BY pathid)
+    AS filtered
     ON dirindex.pathid == filtered.pathid
-       AND dirindex.version == filtered.maxver
-WHERE NOT dirindex.deleted
-ORDER BY pathindex.abspath ASC;
+        AND dirindex.version == filtered.maxver
+WHERE abspath LIKE ? ESCAPE '#'
+      AND dirindex.opcode <> 'D'
+ORDER BY abspath ASC;
+
+---STATEMENT LIST_DIRENT_SUCCESSORS
+
+SELECT dirindex.*, strindex.content AS abspath FROM dirindex
+  JOIN strindex ON dirindex.pathid = strindex.strid
+WHERE dirindex.origin = ?
+ORDER BY abspath ASC, dirindex.version ASC;
+
+
+-- Temporary transaction
+
+---SCRIPT CREATE_TRANSACTION_CONTEXT
+
+CREATE TEMPORARY TABLE txncontext (
+  origin  integer NULL,
+  abspath varchar NOT NULL UNIQUE,
+  kind    character(1) NOT NULL,
+  opcode  character(1) NOT NULL,
+  subtree boolean NOT NULL
+);
+
+---SCRIPT REMOVE_TRANSACTION_CONTEXT
+
+DROP TABLE IF EXISTS temp.txncontext;
+
+---STATEMENT INSERT_TRANSACTION_RECORD
+
+INSERT INTO temp.txncontext (origin, abspath, kind, opcode, subtree)
+  VALUES (?, ?, ?, ?, ?);
+
+---STATEMENT GET_TRANSACTION_RECORD
+
+SELECT * FROM temp.txncontext WHERE abspath = ?;
+
+---STATEMENT REMOVE_TRANSACTION_RECORD
+
+DELETE FROM temp.txncontext WHERE abspath = ?;
+
+---STATEMENT REMOVE_TRANSACTION_SUBTREE
+
+DELETE FROM temp.txncontext WHERE abspath LIKE ? ESCAPE '#';
+
+---STATEMENT LIST_TRANSACTION_RECORDS
+
+SELECT * FROM temp.txncontext ORDER BY abspath ASC;