You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@subversion.apache.org by br...@apache.org on 2012/03/10 23:08:39 UTC

svn commit: r1299280 - in /subversion/trunk/notes/directory-index: ./ dirindex.py schema.sql

Author: brane
Date: Sat Mar 10 22:08:38 2012
New Revision: 1299280

URL: http://svn.apache.org/viewvc?rev=1299280&view=rev
Log:
Proof-of-concept implementation of a single-index representation
for directories in the repository.

* notes/directory-index: New.
* notes/directory-index/schema.sql: New. Index schema and example queries.
* notes/directory-index/dirindex.py: New. Reference implementation.

Added:
    subversion/trunk/notes/directory-index/
    subversion/trunk/notes/directory-index/dirindex.py
    subversion/trunk/notes/directory-index/schema.sql

Added: subversion/trunk/notes/directory-index/dirindex.py
URL: http://svn.apache.org/viewvc/subversion/trunk/notes/directory-index/dirindex.py?rev=1299280&view=auto
==============================================================================
--- subversion/trunk/notes/directory-index/dirindex.py (added)
+++ subversion/trunk/notes/directory-index/dirindex.py Sat Mar 10 22:08:38 2012
@@ -0,0 +1,406 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+import collections
+import datetime
+import logging
+import sqlite3
+
+
+class Error(Exception):
+    pass
+
+
+class _SQLObjectMixin(object):
+    @classmethod
+    def _columns(cls):
+        return ", ".join(cls._fields)
+
+    @classmethod
+    def _qualified_columns(cls):
+        return ", ".join("%s.%s" % (cls._table, f) for f in cls._fields)
+
+    @classmethod
+    def _vars(cls):
+        return ", ".join("?" * len(cls._fields))
+
+    @classmethod
+    def _columns_nokey(cls):
+        return ", ".join(cls._fields[1:])
+
+    @classmethod
+    def _vars_nokey(cls):
+        return ", ".join("?" * (len(cls._fields) - 1))
+
+    def _nokey(self):
+        return self[1:]
+
+
+class Revent(
+        collections.namedtuple("ReventBase", [
+            "version", "created", "author", "log"]),
+        _SQLObjectMixin):
+    _table = "revision"
+
+
+class Dirent(
+        collections.namedtuple("DirentBase", [
+            "rowid", "abspath", "version", "deleted",
+            "kind", "origin", "copied", "subtree"]),
+        _SQLObjectMixin):
+    _table = "dirindex"
+
+    def __str__(self):
+        return "%3d %c%c %s" % (
+            self.version,
+            self.deleted and "x" or " ",
+            self.kind and "f" or "d",
+            self.abspath)
+
+
+class Index(object):
+    def __init__(self, database):
+        self.conn = sqlite3.connect(database, isolation_level = "IMMEDIATE")
+        self.cursor = self.conn.cursor()
+        self.cursor.execute("PRAGMA foreign_keys = ON")
+
+    def execute(self, statement, parameters=None):
+        if parameters is not None:
+            fmt = statement.replace("%", "%%").replace("?", "%r")
+            logging.debug("EXECUTE\n" + fmt, *parameters)
+            return self.cursor.execute(statement, parameters)
+        else:
+            logging.debug("EXECUTE\n%s", statement)
+            return self.cursor.execute(statement)
+
+    __initialize = (
+        """\
+DROP TABLE IF EXISTS dirindex""",
+        """\
+DROP TABLE IF EXISTS revision""",
+        """\
+CREATE TABLE revision (
+  version integer NOT NULL PRIMARY KEY,
+  created timestamp NOT NULL,
+  author  varchar NULL,
+  log     varchar NULL
+)""",
+        """\
+CREATE TABLE dirindex (
+  rowid   integer PRIMARY KEY,
+  abspath varchar NOT NULL,
+  version integer NOT NULL REFERENCES revision(version),
+  deleted boolean NOT NULL,
+  kind    integer NOT NULL,
+  origin  integer NULL REFERENCES dirindex(rowid),
+  copied  boolean NOT NULL,
+  subtree boolean NOT NULL
+)""",
+        """\
+CREATE UNIQUE INDEX dirindex_versioned_tree
+  ON dirindex(abspath ASC, version DESC)""",
+        """\
+CREATE INDEX dirindex_successor_list
+  ON dirindex(origin)""",
+        """\
+INSERT INTO revision (version, created, author, log)
+  VALUES (0, 'EPOCH', NULL, NULL)""",
+        """\
+INSERT INTO dirindex (abspath, version, deleted, kind, origin, copied, subtree)
+  VALUES ('/', 0, 0, 0, NULL, 0, 0)""")
+    def initialize(self):
+        try:
+            for statement in self.__initialize:
+                self.execute(statement)
+            self.commit()
+        finally:
+            self.rollback()
+
+    def commit(self):
+        logging.debug("COMMIT")
+        return self.conn.commit()
+
+    def rollback(self):
+        logging.debug("ROLLBACK")
+        return self.conn.rollback()
+
+    def close(self):
+        self.rollback()
+        logging.debug("CLOSE")
+        return self.conn.close()
+
+    __get_revision = """\
+SELECT %s FROM revision
+WHERE version = ?""" % Revent._qualified_columns()
+    def get_revision(self, version):
+        self.execute(self.__get_revision, [version])
+        row = self.cursor.fetchone()
+        if row is not None:
+            return Revent._make(row)
+        return None
+
+    __new_revision = """\
+INSERT INTO revision (%s)
+  VALUES (%s)""" % (Revent._columns(), Revent._vars())
+    def new_revision(self, version, created=None, author=None, log=None):
+        if created is None:
+            created = datetime.datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%S.%fZ")
+        revent = Revent(version = version,
+                        created = created,
+                        author = author,
+                        log = log)
+        self.execute(self.__new_revision, revent)
+        return revent
+
+    __insert = """\
+INSERT INTO dirindex (%s)
+  VALUES (%s)""" % (
+    Dirent._columns_nokey(), Dirent._vars_nokey())
+    def insert(self, dirent):
+        assert isinstance(dirent, Dirent)
+        self.execute(self.__insert, dirent._nokey())
+        return Dirent(self.cursor.lastrowid, *dirent._nokey())
+
+    __lookup = """\
+SELECT %s FROM dirindex
+WHERE abspath = ? AND version <= ?
+ORDER BY abspath ASC, version DESC
+LIMIT 1""" % Dirent._qualified_columns()
+    def lookup(self, abspath, version):
+        self.execute(self.__lookup, [abspath, version])
+        row = self.cursor.fetchone()
+        if row is not None:
+            dirent = Dirent._make(row)
+            if not dirent.deleted:
+                return dirent
+        return None
+
+    __subtree = """\
+SELECT %s FROM dirindex
+  JOIN (SELECT abspath, MAX(version) AS maxver FROM dirindex
+        WHERE version <= ? GROUP BY abspath) AS filtered
+  ON dirindex.abspath == filtered.abspath
+     AND dirindex.version == filtered.maxver
+WHERE dirindex.abspath LIKE ? ESCAPE '#'
+      AND NOT dirindex.deleted
+ORDER BY dirindex.abspath ASC""" % Dirent._qualified_columns()
+    def subtree(self, abspath, version):
+        pattern = (abspath.rstrip("/")
+                   .replace("#", "##")
+                   .replace("%", "#%")
+                   .replace("_", "#_")) + "/%"
+        self.execute(self.__subtree, [version, pattern])
+        for row in self.cursor:
+            yield Dirent._make(row)
+
+    __predecessor = """\
+SELECT %s FROM dirindex
+WHERE rowid = ?"""  % Dirent._qualified_columns()
+    def predecessor(self, dirent):
+        assert isinstance(dirent, Dirent)
+        if dirent.origin is None:
+            return None
+        self.execute(self.__predecessor, [dirent.origin])
+        return Dirent._make(self.cursor.fetchone())
+
+    __successors = """\
+SELECT %s FROM dirindex
+where origin = ?
+ORDER BY abspath ASC, version ASC""" % Dirent._qualified_columns()
+    def successors(self, dirent):
+        assert isinstance(dirent, Dirent)
+        self.execute(self.__successors, [dirent.rowid])
+        for row in self.cursor:
+            yield Dirent._make(row)
+
+
+class Revision(object):
+    def __init__(self, index, version,
+                 created=None, author=None, log=None):
+        self.index = index
+        self.version = version
+        self.revent = index.get_revision(version)
+        self.__created = created
+        self.__author = author
+        self.__log = log
+        self.__context = None
+        index.rollback()
+
+    def __enter__(self):
+        if self.revent is not None:
+            raise Error("revision is read-only")
+        logging.debug("BEGIN")
+        self.revent = self.index.new_revision(
+            self.version, self.__created, self.__author, self.__log)
+        self.__context = {}
+        return self
+
+    def __exit__(self, exc_type, exc_value, traceback):
+        try:
+            if exc_type is None and len(self.__context):
+                for dirent in sorted(self.__context.itervalues()):
+                    self.index.insert(dirent)
+                self.index.commit()
+        except:
+            self.index.rollback()
+            raise
+        finally:
+            self.__context = None
+
+    def __check_writable(self, action):
+        if self.__context is None:
+            raise Error(action + " requires a transaction")
+
+    def __check_not_root(self, abspath, action):
+        if abspath.rstrip("/") == "":
+            raise Error(action + " not allowed on /")
+
+    def __find_target(self, abspath, action):
+        target = self.index.lookup(abspath, self.version - 1)
+        if target is None:
+            raise Error(action + " target does not exist: " + abspath)
+        return target
+
+    def lookup(self, abspath):
+        try:
+            return self.index.lookup(abspath, self.version)
+        finally:
+            if self.__context is None:
+                self.index.rollback()
+
+    def __add(self, action, abspath, kind, frompath, fromver):
+        origin = None
+        if frompath is not None:
+            origin = self.index.lookup(frompath, fromver)
+            if origin is None:
+                raise Error(action + " source does not exist: " + frompath)
+            if origin.kind != kind:
+                raise Error(action + " changes the source object kind")
+            origin = origin.rowid
+        dirent = Dirent(rowid = None,
+                        abspath = abspath,
+                        version = self.version,
+                        deleted = 0,
+                        kind = kind,
+                        origin = origin,
+                        copied = int(origin is not None),
+                        subtree = 0)
+        self.__context[dirent.abspath] = dirent
+        if frompath is not None:
+            offset = len(frompath.rstrip("/"))
+            prefix = abspath.rstrip("/")
+            for source in self.index.subtree(frompath, fromver):
+                dirent = Dirent(rowid = None,
+                                abspath = prefix + source.abspath[offset:],
+                                version = self.version,
+                                deleted = 0,
+                                kind = source.kind,
+                                origin = source.rowid,
+                                copied = 1,
+                                subtree = 1)
+                self.__context[dirent.abspath] = dirent
+
+    def add(self, abspath, kind, frompath=None, fromver=None):
+        self.__check_writable("add")
+        self.__check_not_root(abspath, "add")
+        return self.__add("add", abspath, kind, frompath, fromver)
+
+    def replace(self, abspath, kind, frompath=None, fromver=None):
+        self.__check_writable("replace")
+        self.__check_not_root(abspath, "replace")
+        self.__find_target(abspath, "replace")
+        return self.__add("replace", abspath, kind, frompath, fromver)
+
+    def modify(self, abspath):
+        self.__check_writable("modify")
+        target = self.__find_target(abspath, "modify")
+        dirent = Dirent(rowid = None,
+                        abspath = abspath,
+                        version = self.version,
+                        deleted = 0,
+                        kind = target.kind,
+                        origin = target.rowid,
+                        copied = 0,
+                        subtree = 0)
+        self.__context[dirent.abspath] = dirent
+
+    def delete(self, abspath):
+        self.__check_writable("delete")
+        self.__check_not_root(abspath, "delete")
+        target = self.__find_target(abspath, "delete")
+        dirent = Dirent(rowid = None,
+                        abspath = abspath,
+                        version = self.version,
+                        deleted = 1,
+                        kind = target.kind,
+                        origin = target.rowid,
+                        copied = 0,
+                        subtree = 0)
+        self.__context[dirent.abspath] = dirent
+        for source in self.index.subtree(abspath, self.version - 1):
+            dirent = Dirent(rowid = None,
+                            abspath = source.abspath,
+                            version = self.version,
+                            deleted = 1,
+                            kind = source.kind,
+                            origin = source.rowid,
+                            copied = 0,
+                            subtree = 1)
+            self.__context[dirent.abspath] = dirent
+
+
+def simpletest(database):
+    ix = Index(database)
+    ix.initialize()
+    with Revision(ix, 1) as rev:
+        rev.add(u'/A', 0)
+        rev.add(u'/A/B', 0)
+        rev.add(u'/A/B/c', 1)
+    with Revision(ix, 2) as rev:
+        rev.add(u'/A/B/d', 1)
+    with Revision(ix, 3) as rev:
+        rev.add(u'/X', 0, u'/A', 1)
+        rev.add(u'/X/B/d', 1, u'/A/B/d', 2)
+    with Revision(ix, 4) as rev:
+        rev.delete(u'/X/B/d')
+        rev.add(u'/X/B/x', 1, u'/X/B/d', 3)
+    with Revision(ix, 5) as rev:
+        rev.delete(u'/A')
+
+    for r in (0, 1, 2, 3, 4, 5):
+        print "Revision: %d" % r
+        for dirent in list(ix.subtree('/', r)):
+            origin = ix.predecessor(dirent)
+            if origin is None:
+                print "   " + str(dirent)
+            else:
+                print "   %-17s  <- %s" % (dirent, origin)
+
+    dirent = ix.lookup('/A/B/c', 4)
+    print "/A/B/c@4 -> %s@%d" % (dirent.abspath, dirent.version)
+    for succ in ix.successors(dirent):
+        print "%11s %s %s@%d" % (
+            "", succ.deleted and "x_x" or "-->",
+            succ.abspath, succ.version)
+
+    ix.close()
+
+def loggedsimpletest(database):
+    import sys
+    logging.basicConfig(level=logging.DEBUG, stream=sys.stderr)
+    simpletest(database)

Added: subversion/trunk/notes/directory-index/schema.sql
URL: http://svn.apache.org/viewvc/subversion/trunk/notes/directory-index/schema.sql?rev=1299280&view=auto
==============================================================================
--- subversion/trunk/notes/directory-index/schema.sql (added)
+++ subversion/trunk/notes/directory-index/schema.sql Sat Mar 10 22:08:38 2012
@@ -0,0 +1,81 @@
+-- -*- mode: sql; sql-product: sqlite; coding: utf-8 -*-
+-- Licensed to the Apache Software Foundation (ASF) under one
+-- or more contributor license agreements.  See the NOTICE file
+-- distributed with this work for additional information
+-- regarding copyright ownership.  The ASF licenses this file
+-- to you under the Apache License, Version 2.0 (the
+-- "License"); you may not use this file except in compliance
+-- with the License.  You may obtain a copy of the License at
+--
+--   http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing,
+-- software distributed under the License is distributed on an
+-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+-- KIND, either express or implied.  See the License for the
+-- specific language governing permissions and limitations
+-- under the License.
+
+CREATE TABLE revision (
+  version integer NOT NULL PRIMARY KEY,
+  created timestamp NOT NULL,
+  author  varchar NULL,
+  log     varchar NULL
+);
+
+CREATE TABLE dirindex (
+  -- unique id of this node revision, used for
+  -- predecessor/successor links
+  rowid   integer PRIMARY KEY,
+
+  -- absolute (repository) path
+  abspath varchar NOT NULL,
+
+  -- revision number
+  version integer NOT NULL REFERENCES revision(version),
+
+  -- node deletion flag
+  deleted boolean NOT NULL,
+
+  -- node kind (0 = dir, 1 = file, etc.)
+  kind    integer NOT NULL,
+
+  -- predecessor link
+  origin  integer NULL REFERENCES dirindex(rowid),
+
+  -- the predecessor is a copy source
+  copied  boolean NOT NULL,
+
+  -- the index entry is the result of an implicit subtree operation
+  subtree boolean NOT NULL
+);
+CREATE UNIQUE INDEX dirindex_versioned_tree ON dirindex(abspath ASC, version DESC);
+CREATE INDEX dirindex_successor_list ON dirindex(origin);
+
+-- repository root
+INSERT INTO revision (version, created, author, log)
+VALUES (0, 'EPOCH', NULL, NULL);
+INSERT INTO dirindex (abspath, version, deleted, kind, origin, copied, implied)
+VALUES ('/', 0, 0, 0, NULL, 0, 0);
+
+
+-- lookup PATH@REVISION
+
+SELECT * FROM dirindex
+WHERE
+  abspath = '' -- $PATH
+  AND version <= 0 -- $REVISION
+ORDER BY abspath ASC, version DESC
+LIMIT 1;  -- then check .deleted
+
+-- single-revision tree for REVISION
+
+SELECT dirindex.* FROM dirindex
+  JOIN (SELECT abspath, MAX(version) AS maxver FROM dirindex
+        WHERE version <= 0 -- $REVISION
+        GROUP BY abspath)
+      AS filtered
+    ON dirindex.abspath == filtered.abspath
+       AND dirindex.version == filtered.maxver
+WHERE NOT dirindex.deleted
+ORDER BY dirindex.abspath ASC;