You are viewing a plain text version of this content. The canonical link for it is here.
Posted to general@db.apache.org by rh...@apache.org on 2012/12/19 19:20:28 UTC
svn commit: r843115 [26/44] - in /websites/production/db/content/derby: ./
binaries/ blogs/ blogs/images/ dev/ docs/ images/ integrate/
integrate/plugin_help/ integrate/plugin_help/images/ logo/ manuals/ papers/
papers/DerbyTut/ releases/ skin/ skin/cs...
Added: websites/production/db/content/derby/papers/pageformats.html
==============================================================================
--- websites/production/db/content/derby/papers/pageformats.html (added)
+++ websites/production/db/content/derby/papers/pageformats.html Wed Dec 19 18:20:21 2012
@@ -0,0 +1,1440 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html>
+<head>
+<META http-equiv="Content-Type" content="text/html; charset=UTF-8">
+<meta content="Apache Forrest" name="Generator">
+<meta name="Forrest-version" content="0.8">
+<meta name="Forrest-skin-name" content="pelt">
+<title>Derby On Disk Page Format</title>
+<link type="text/css" href="../skin/basic.css" rel="stylesheet">
+<link media="screen" type="text/css" href="../skin/screen.css" rel="stylesheet">
+<link media="print" type="text/css" href="../skin/print.css" rel="stylesheet">
+<link type="text/css" href="../skin/profile.css" rel="stylesheet">
+<script src="../skin/getBlank.js" language="javascript" type="text/javascript"></script><script src="../skin/getMenu.js" language="javascript" type="text/javascript"></script><script src="../skin/fontsize.js" language="javascript" type="text/javascript"></script>
+<link rel="shortcut icon" href="../">
+</head>
+<body onload="init()">
+<script type="text/javascript">ndeSetTextSize();</script>
+<div id="top">
+<!--+
+ |breadtrail
+ +-->
+<div class="breadtrail">
+<a href="http://www.apache.org/">apache</a> > <a href="http://db.apache.org/">db</a><script src="../skin/breadcrumbs.js" language="JavaScript" type="text/javascript"></script>
+</div>
+<!--+
+ |header
+ +-->
+<div class="header">
+<!--+
+ |start group logo
+ +-->
+<div class="grouplogo">
+<a href="http://db.apache.org/derby"><img class="logoImage" alt="Apache Derby" src="../images/derby-logo-web.png" title="Derby is a zero-admin Java RDBMS"></a>
+</div>
+<!--+
+ |end group logo
+ +-->
+<!--+
+ |start Project Logo
+ +-->
+<div class="projectlogoA1">
+<a href="http://db.apache.org"><img class="logoImage" alt="Apache DB Project" src="../images/db-logo-white.png" title="Apache DB creates and maintains database solutions."></a>
+</div>
+<!--+
+ |end Project Logo
+ +-->
+<!--+
+ |start Tabs
+ +-->
+<ul id="tabs">
+<li>
+<a class="unselected" href="../index.html">Home</a>
+</li>
+<li>
+<a class="unselected" href="../quick_start.html">Quick Start</a>
+</li>
+<li>
+<a class="unselected" href="../derby_downloads.html">Download</a>
+</li>
+<li>
+<a class="unselected" href="../derby_comm.html">Community</a>
+</li>
+<li>
+<a class="unselected" href="../manuals/index.html">Documentation</a>
+</li>
+<li class="current">
+<a class="selected" href="../blogs/index.html">Resources</a>
+</li>
+</ul>
+<!--+
+ |end Tabs
+ +-->
+</div>
+</div>
+<div id="main">
+<div id="publishedStrip">
+<!--+
+ |start Subtabs
+ +-->
+<div id="level2tabs"></div>
+<!--+
+ |end Endtabs
+ +-->
+<script type="text/javascript"><!--
+document.write("Last Published: " + document.lastModified);
+// --></script>
+</div>
+<!--+
+ |breadtrail
+ +-->
+<div class="breadtrail">
+
+
+ </div>
+<!--+
+ |start Menu, mainarea
+ +-->
+<!--+
+ |start Menu
+ +-->
+<div id="menu">
+<div onclick="SwitchMenu('menu_1.1', '../skin/')" id="menu_1.1Title" class="menutitle">Blogs and Articles About Derby</div>
+<div id="menu_1.1" class="menuitemgroup">
+<div class="menuitem">
+<a href="../blogs/index.html">Overview</a>
+</div>
+<div class="menuitem">
+<a href="../blogs/index.html#blogs">Blogs</a>
+</div>
+<div onclick="SwitchMenu('menu_1.1.3', '../skin/')" id="menu_1.1.3Title" class="menutitle">Articles</div>
+<div id="menu_1.1.3" class="menuitemgroup">
+<div onclick="SwitchMenu('menu_1.1.3.1', '../skin/')" id="menu_1.1.3.1Title" class="menutitle">Tutorials, Tips and Tuning</div>
+<div id="menu_1.1.3.1" class="menuitemgroup">
+<div class="menuitem">
+<a href="../blogs/index.html#getstarted">Getting Started</a>
+</div>
+<div class="menuitem">
+<a href="../blogs/index.html#features">Features, Hints and Tips</a>
+</div>
+<div class="menuitem">
+<a href="../blogs/index.html#security">Security</a>
+</div>
+<div class="menuitem">
+<a href="../blogs/index.html#performance">Performance and Tuning</a>
+</div>
+</div>
+<div onclick="SwitchMenu('menu_1.1.3.2', '../skin/')" id="menu_1.1.3.2Title" class="menutitle">Tools and Migration</div>
+<div id="menu_1.1.3.2" class="menuitemgroup">
+<div class="menuitem">
+<a href="../blogs/index.html#tools">Tools</a>
+</div>
+<div class="menuitem">
+<a href="../blogs/index.html#migration">Migration</a>
+</div>
+</div>
+<div onclick="SwitchMenu('menu_1.1.3.3', '../skin/')" id="menu_1.1.3.3Title" class="menutitle">Applications</div>
+<div id="menu_1.1.3.3" class="menuitemgroup">
+<div class="menuitem">
+<a href="../blogs/index.html#client">Client</a>
+</div>
+<div class="menuitem">
+<a href="../blogs/index.html#middletier">Middle Tier</a>
+</div>
+<div class="menuitem">
+<a href="../blogs/index.html#persistence">Persistence</a>
+</div>
+<div class="menuitem">
+<a href="../blogs/index.html#scalability">Scalability and Failover</a>
+</div>
+</div>
+</div>
+</div>
+<div onclick="SwitchMenu('menu_1.2', '../skin/')" id="menu_1.2Title" class="menutitle">Integration With Other Products</div>
+<div id="menu_1.2" class="menuitemgroup">
+<div class="menuitem">
+<a href="../integrate/index.html">Overview</a>
+</div>
+<div class="menuitem">
+<a href="../integrate/index.html#uses">What works with Derby?</a>
+</div>
+<div class="menuitem">
+<a href="../integrate/index.html#products">Product Writeups</a>
+</div>
+</div>
+<div onclick="SwitchMenu('menu_1.3', '../skin/')" id="menu_1.3Title" class="menutitle">Eclipse Plug-ins</div>
+<div id="menu_1.3" class="menuitemgroup">
+<div class="menuitem">
+<a href="../integrate/derby_plugin_info.html">Info</a>
+</div>
+</div>
+<div onclick="SwitchMenu('menu_selected_1.4', '../skin/')" id="menu_selected_1.4Title" class="menutitle" style="background-image: url('../skin/images/chapter_open.gif');">Papers and Presentations</div>
+<div id="menu_selected_1.4" class="selectedmenuitemgroup" style="display: block;">
+<div class="menuitem">
+<a href="../papers/index.html">Overview</a>
+</div>
+<div onclick="SwitchMenu('menu_selected_1.4.2', '../skin/')" id="menu_selected_1.4.2Title" class="menutitle" style="background-image: url('../skin/images/chapter_open.gif');">Derby Engine</div>
+<div id="menu_selected_1.4.2" class="selectedmenuitemgroup" style="display: block;">
+<div onclick="SwitchMenu('menu_1.4.2.1', '../skin/')" id="menu_1.4.2.1Title" class="menutitle">Javadoc</div>
+<div id="menu_1.4.2.1" class="menuitemgroup">
+<div class="menuitem">
+<a href="http://db.apache.org/derby/javadoc/engine">Engine</a>
+</div>
+<div class="menuitem">
+<a href="http://db.apache.org/derby/javadoc/language">Language</a>
+</div>
+<div class="menuitem">
+<a href="http://db.apache.org/derby/javadoc/tools">Tools</a>
+</div>
+<div class="menuitem">
+<a href="http://db.apache.org/derby/javadoc/publishedapi">API</a>
+</div>
+</div>
+<div class="menuitem">
+<a href="../papers/derby_arch.html">Architecture</a>
+</div>
+<div class="menuitem">
+<a href="../papers/btree_package.html">BTree</a>
+</div>
+<div class="menupage">
+<div class="menupagetitle">Disk Page Format</div>
+</div>
+<div class="menuitem">
+<a href="../papers/derby_htw.html">How Things Work</a>
+</div>
+<div class="menuitem">
+<a href="../papers/Intersect-design.html">Intersect & Except</a>
+</div>
+<div class="menuitem">
+<a href="../papers/JDBCImplementation.html">JDBC</a>
+</div>
+<div class="menuitem">
+<a href="../papers/logformats.html">Log Format</a>
+</div>
+<div class="menuitem">
+<a href="../papers/recovery.html">Logging & Recovery</a>
+</div>
+<div class="menuitem">
+<a href="../papers/optimizer.html">Optimizer</a>
+</div>
+<div class="menuitem">
+<a href="http://db.apache.org/derby/javadoc/engine/org/apache/derby/iapi/types/package-summary.html#package_description">Type System</a>
+</div>
+<div class="menuitem">
+<a href="../papers/versionupgrade.html">Versioning</a>
+</div>
+</div>
+<div onclick="SwitchMenu('menu_1.4.3', '../skin/')" id="menu_1.4.3Title" class="menutitle">Derby Network Client</div>
+<div id="menu_1.4.3" class="menuitemgroup">
+<div class="menuitem">
+<a href="../papers/DerbyClientSpec.html">Functional Spec</a>
+</div>
+</div>
+<div onclick="SwitchMenu('menu_1.4.4', '../skin/')" id="menu_1.4.4Title" class="menutitle">Derby Tutorial</div>
+<div id="menu_1.4.4" class="menuitemgroup">
+<div class="menuitem">
+<a href="../papers/DerbyTut/index.html">Overview</a>
+</div>
+<div class="menuitem">
+<a href="../papers/DerbyTut/install_software.html">Step 1: Install Software</a>
+</div>
+<div class="menuitem">
+<a href="../papers/DerbyTut/ij_intro.html">Step 2: ij Basics</a>
+</div>
+<div class="menuitem">
+<a href="../papers/DerbyTut/embedded_intro.html">Step 3: Embedded Derby</a>
+</div>
+<div class="menuitem">
+<a href="../papers/DerbyTut/ns_intro.html">Step 4: Derby Network Server</a>
+</div>
+</div>
+<div onclick="SwitchMenu('menu_1.4.5', '../skin/')" id="menu_1.4.5Title" class="menutitle">Presentations</div>
+<div id="menu_1.4.5" class="menuitemgroup">
+<div class="menuitem">
+<a href="../papers/ApacheCon.html">ApacheCon</a>
+</div>
+<div class="menuitem">
+<a href="../papers/MiscPresentations.html#Victorian+Java+User+Group">Victorian JUG 2008</a>
+</div>
+<div class="menuitem">
+<a href="../papers/MiscPresentations.html#OSCON+2005">OSCON 2005</a>
+</div>
+<div class="menuitem">
+<a href="../papers/MiscPresentations.html#Colorado+Software+Summit+2004">Colorado 2004</a>
+</div>
+</div>
+</div>
+<!--+
+ |start Search
+ +-->
+<div class="searchbox">
+<hr>
+<form action="http://www.google.com/search" method="get">
+<input value="db.apache.org" name="sitesearch" type="hidden"><input onFocus="getBlank (this, 'Search the site with google');" size="18" name="q" id="query" type="text" value="Search the site with google">
+ <input name="Search" value="Search" type="submit">
+</form>
+</div>
+<!--+
+ |end search
+ +-->
+<div id="credit"></div>
+<div id="roundbottom">
+<img style="display: none" class="corner" height="15" width="15" alt="" src="../skin/images/rc-b-l-15-1body-2menu-3menu.png"></div>
+<!--+
+ |alternative credits
+ +-->
+<div id="credit2"></div>
+</div>
+<!--+
+ |end Menu
+ +-->
+<!--+
+ |start content
+ +-->
+<div id="content">
+<div class="trail">Font size:
+ <input value="Reset" class="resetfont" title="Reset text" onclick="ndeSetTextSize('reset'); return false;" type="button">
+ <input value="-a" class="smallerfont" title="Shrink text" onclick="ndeSetTextSize('decr'); return false;" type="button">
+ <input value="+a" class="biggerfont" title="Enlarge text" onclick="ndeSetTextSize('incr'); return false;" type="button">
+</div>
+<h1>Derby On Disk Page Format</h1>
+<div class="abstract">This document describes the storage format of Derby disk pages.
+
+ This is a work-in-progress derived from Javadoc comments and
+
+ from explanations Mike Matrigali posted to the Derby lists.
+
+ Please post questions, comments, and corrections to
+
+ derby-dev@db.apache.org.
+
+ </div>
+<div id="minitoc-area">
+<ul class="minitoc">
+<li>
+<a href="#introduction"> Introduction </a>
+</li>
+<li>
+<a href="#storedpage">Data Page Format</a>
+<ul class="minitoc">
+<li>
+<a href="#formatid">Format Id </a>
+</li>
+<li>
+<a href="#pageheader"> Page Header </a>
+</li>
+<li>
+<a href="#records"> Records </a>
+</li>
+<li>
+<a href="#slottable">Slot Offset Table</a>
+</li>
+<li>
+<a href="#checksum">Checksum</a>
+</li>
+</ul>
+</li>
+<li>
+<a href="#allocpage">Allocation Page</a>
+<ul class="minitoc">
+<li>
+<a href="#Alloc+Page+detailed+implementation+notes">
+
+ Alloc Page detailed implementation notes</a>
+</li>
+</ul>
+</li>
+<li>
+<a href="#Allocation+Extent">Allocation Extent</a>
+</li>
+</ul>
+</div>
+
+<a name="N10010"></a><a name="introduction"></a>
+<h2 class="boxed"> Introduction </h2>
+<div class="section">
+<p>Derby stores table and index data in Containers, which currently map
+
+ to files in the <span class="codefrag">seg0</span>
+
+ directory of the database. In the current Derby implementation there is a 1 to 1 mapping of
+
+ containers to files. Two containers never map to a single file and 1
+
+ container never maps to multiple files.</p>
+<p>
+
+ Data is stored in pages within the container.</p>
+<p>A page contains a set of records, which can be accessed by "slot", which
+
+ defines the order of the records on the page, or by "id" which defines
+
+ the identity of the records on the page. Clients access records by both
+
+ slot and id, depending on their needs.</p>
+<p>A Table or a BTree index provides a row-based access mechanism (row-based
+
+ access interface is known as conglomerate). Rows are mapped to records
+
+ in data pages; in case of a table, a single row can span multiple records in
+
+ multiple pages.</p>
+<p>A container can have three types of pages:</p>
+<ul>
+
+<li>Header Page - which is just a specialized version of the Alloc Page.</li>
+
+<li>Data Pages which hold data, and</li>
+
+<li>Alloc Pages which hold page allocation information. An Alloc page is a specialized verion of the Data page.</li>
+
+</ul>
+<p>The container can be visualised as:<br>
+<img alt="" src="container-format.png"></p>
+<p>
+
+Header Page is currently always page 0 of the container. It
+
+contains information that raw store needs to maintain about the
+
+container once per container, and is currently implemented as an Alloc
+
+Page which "borrows" space from the alloc page for it's information.
+
+The original decision was that the designers did not want to waste a whole page for
+
+header information, so a part of the page was used and the first allocation
+
+map was put on the second half of it. See <span class="codefrag">AllocPage.java</span> for info about layout and
+
+borrowing.
+
+</p>
+<p>
+
+<a href="#allocpage"> Allocation Page</a> - After page 0, all subsequent Allocation pages only
+
+have allocation bit maps.
+
+</p>
+</div>
+
+<a name="N10049"></a><a name="storedpage"></a>
+<h2 class="boxed">Data Page Format</h2>
+<div class="section">
+<p>A data page is broken into five sections.
+
+ <img alt="" src="page-format.png"></p>
+<a name="N10055"></a><a name="formatid"></a>
+<h3 class="boxed">Format Id </h3>
+<p> The formatId is a 4 bytes array, it contains the format Id of this
+
+ page. The possible values are RAW_STORE_STORED_PAGE or RAW_STORE_ALLOC_PAGE.</p>
+<a name="N1005F"></a><a name="pageheader"></a>
+<h3 class="boxed"> Page Header </h3>
+<p> The page header is a fixed size, 56 bytes. </p>
+<table class="ForrestTable" cellspacing="1" cellpadding="4">
+
+<tr>
+
+<th colspan="1" rowspan="1">Size</th>
+ <th colspan="1" rowspan="1">Type</th>
+ <th colspan="1" rowspan="1">Description</th>
+
+</tr>
+
+<tr>
+
+<td colspan="1" rowspan="1">1 byte</td>
+ <td colspan="1" rowspan="1">boolean</td>
+ <td colspan="1" rowspan="1">is page an overflow page</td>
+
+</tr>
+
+<tr>
+
+<td colspan="1" rowspan="1">1 byte</td>
+ <td colspan="1" rowspan="1">byte</td>
+ <td colspan="1" rowspan="1">
+
+<p>page status is either VALID_PAGE or INVALID_PAGE(a field
+
+ maintained in base page)</p>
+
+<p>page goes thru the following transition:
+
+ <br>
+
+ VALID_PAGE <-> deallocated page -> free page <->
+
+ VALID_PAGE</p>
+
+<p>deallocated and free page are both INVALID_PAGE as far as BasePage
+
+ is concerned.
+
+ <br>
+
+ When a page is deallocated, it transitioned from VALID_PAGE
+
+ to INVALID_PAGE.
+
+ <br>
+
+ When a page is allocated, it trnasitioned from INVALID_PAGE
+
+ to VALID_PAGE.</p>
+
+</td>
+
+</tr>
+
+<tr>
+
+<td colspan="1" rowspan="1">8 bytes</td>
+ <td colspan="1" rowspan="1">long</td>
+ <td colspan="1" rowspan="1">pageVersion (a field maintained in base page)</td>
+
+</tr>
+
+<tr>
+
+<td colspan="1" rowspan="1">2 bytes</td>
+ <td colspan="1" rowspan="1">unsigned short</td>
+ <td colspan="1" rowspan="1">number of slots in slot offset table</td>
+
+</tr>
+
+<tr>
+
+<td colspan="1" rowspan="1">4 bytes</td>
+ <td colspan="1" rowspan="1">integer</td>
+ <td colspan="1" rowspan="1">next record identifier</td>
+
+</tr>
+
+<tr>
+
+<td colspan="1" rowspan="1">4 bytes</td>
+ <td colspan="1" rowspan="1">integer</td>
+ <td colspan="1" rowspan="1">generation number of this page (Future Use)</td>
+
+</tr>
+
+<tr>
+
+<td colspan="1" rowspan="1">4 bytes</td>
+ <td colspan="1" rowspan="1">integer</td>
+ <td colspan="1" rowspan="1">previous generation of this page (Future Use)</td>
+
+</tr>
+
+<tr>
+
+<td colspan="1" rowspan="1">8 bytes</td>
+ <td colspan="1" rowspan="1">bipLocation</td>
+ <td colspan="1" rowspan="1">the location of the beforeimage page (Future Use)</td>
+
+</tr>
+
+<tr>
+
+<td colspan="1" rowspan="1">2 bytes</td>
+ <td colspan="1" rowspan="1">unsigned short</td>
+ <td colspan="1" rowspan="1">number of deleted rows on page. (new release 2.0)</td>
+
+</tr>
+
+<tr>
+
+<td colspan="1" rowspan="1">2 bytes</td>
+ <td colspan="1" rowspan="1">short</td>
+ <td colspan="1" rowspan="1">spare for future use</td>
+
+</tr>
+
+<tr>
+
+<td colspan="1" rowspan="1">4 bytes</td>
+ <td colspan="1" rowspan="1">integer</td>
+ <td colspan="1" rowspan="1">spare for future use (encryption uses to write random bytes
+
+ here).</td>
+
+</tr>
+
+<tr>
+
+<td colspan="1" rowspan="1">8 bytes</td>
+ <td colspan="1" rowspan="1">long</td>
+ <td colspan="1" rowspan="1">spare for future use</td>
+
+</tr>
+
+<tr>
+
+<td colspan="1" rowspan="1">8 bytes</td>
+ <td colspan="1" rowspan="1">long</td>
+ <td colspan="1" rowspan="1">spare for future use</td>
+
+</tr>
+
+</table>
+<div class="note">
+<div class="label">Note</div>
+<div class="content">Spare space is guaranteed to be writen with "0", so that future
+
+ use of field should not either not use "0" as a valid data item or
+
+ pick 0 as a valid default value so that on the fly upgrade can assume
+
+ that 0 means field was never assigned. </div>
+</div>
+<a name="N1017A"></a><a name="records"></a>
+<h3 class="boxed"> Records </h3>
+<p>The records section contains zero or more records. Each record starts
+
+ with a Record Header</p>
+<table class="ForrestTable" cellspacing="1" cellpadding="4">
+
+<caption>Record Header</caption>
+
+<tr>
+
+<th colspan="1" rowspan="1">Type</th>
+ <th colspan="1" rowspan="1">Description</th>
+
+</tr>
+
+<tr>
+
+<td colspan="1" rowspan="1">1 byte</td>
+ <td colspan="1" rowspan="1">
+
+<p>Status bits for the record header</p>
+
+<table class="ForrestTable" cellspacing="1" cellpadding="4">
+
+<tr>
+
+<td colspan="1" rowspan="1">RECORD_DELETED</td>
+ <td colspan="1" rowspan="1">used to indicate the record has been deleted</td>
+
+</tr>
+
+<tr>
+
+<td colspan="1" rowspan="1">RECORD_OVERFLOW</td>
+ <td colspan="1" rowspan="1">used to indicate the record has been overflowed, it will
+
+ point to the overflow page and ID</td>
+
+</tr>
+
+<tr>
+
+<td colspan="1" rowspan="1">RECORD_HAS_FIRST_FIELD</td>
+ <td colspan="1" rowspan="1">used to indicate that firstField is stored will be stored.
+
+ When RECORD_OVERFLOW and RECORD_HAS_FIRST_FIELD both are
+
+ set, part of record is on the page, the record header also
+
+ stores the overflow point to the next part of the record.</td>
+
+</tr>
+
+<tr>
+
+<td colspan="1" rowspan="1">RECORD_VALID_MASK</td>
+ <td colspan="1" rowspan="1">A mask of valid bits that can be set currently, such that
+
+ the following assert can be made: </td>
+
+</tr>
+
+</table>
+
+</td>
+
+</tr>
+
+<tr>
+
+<td colspan="1" rowspan="1">compressed int</td>
+ <td colspan="1" rowspan="1">record identifier</td>
+
+</tr>
+
+<tr>
+
+<td colspan="1" rowspan="1">compressed long</td>
+ <td colspan="1" rowspan="1">overflow page only if RECORD_OVERFLOW is set</td>
+
+</tr>
+
+<tr>
+
+<td colspan="1" rowspan="1">compressed int</td>
+ <td colspan="1" rowspan="1">overflow id only if RECORD_OVERFLOW is set</td>
+
+</tr>
+
+<tr>
+
+<td colspan="1" rowspan="1">compressed int</td>
+ <td colspan="1" rowspan="1">first field only if RECORD_HAS_FIRST_FIELD is set - otherwise
+
+ 0</td>
+
+</tr>
+
+<tr>
+
+<td colspan="1" rowspan="1">compressed int</td>
+ <td colspan="1" rowspan="1">number of fields in this portion - only if RECORD_OVERFLOW is
+
+ false OR RECORD_HAS_FIRST_FIELD is true - otherwise 0</td>
+
+</tr>
+
+</table>
+<div class="note">
+<div class="label">Long Rows</div>
+<div class="content"> A row is long if all of it's columns can't fit on a single page.
+
+ When storing a long row, the segment of the row which fits on the
+
+ page is left there, and a pointer column is added at the end of the
+
+ row. It points to another row in the same container on a different
+
+ page. That row will contain the next set of columns and a continuation
+
+ pointer if necessary. The overflow portion will be on an "overflow"
+
+ page, and that page may have overflow portions of other rows on it
+
+ (unlike overflow columns). </div>
+</div>
+<p>The Record Header is followed by one or more fields. Each field contains
+
+ a Field Header and optional Field Data.</p>
+<table class="ForrestTable" cellspacing="1" cellpadding="4">
+
+<caption>Stored Field Header Format</caption>
+
+<tr>
+
+<td colspan="1" rowspan="1">status</td>
+ <td colspan="1" rowspan="1">
+
+<p> The status is 1 byte, it indicates the state of the field.
+
+ A FieldHeader can be in the following states: </p>
+
+<table class="ForrestTable" cellspacing="1" cellpadding="4">
+
+<tr>
+
+<td colspan="1" rowspan="1">NULL</td>
+ <td colspan="1" rowspan="1">if the field is NULL, no field data length is stored</td>
+
+</tr>
+
+<tr>
+
+<td colspan="1" rowspan="1">OVERFLOW</td>
+ <td colspan="1" rowspan="1">indicates the field has been overflowed to another page.
+
+ overflow page and overflow ID is stored at the end of
+
+ the user data. field data length must be a number greater
+
+ or equal to 0, indicating the length of the field that
+
+ is stored on the current page. The format looks like this:
+
+ <img alt="" src="field-header-overflow.png">
+
+ overflowPage will be written as compressed long, overflowId
+
+ will be written as compressed Int</td>
+
+</tr>
+
+<tr>
+
+<td colspan="1" rowspan="1">NONEXISTENT</td>
+ <td colspan="1" rowspan="1">the field no longer exists, e.g. column has been dropped
+
+ during an alter table</td>
+
+</tr>
+
+<tr>
+
+<td colspan="1" rowspan="1">EXTENSIBLE</td>
+ <td colspan="1" rowspan="1">the field is of user defined data type. The field may
+
+ be tagged.</td>
+
+</tr>
+
+<tr>
+
+<td colspan="1" rowspan="1">TAGGED</td>
+ <td colspan="1" rowspan="1">the field is TAGGED if and only if it is EXTENSIBLE.</td>
+
+</tr>
+
+<tr>
+
+<td colspan="1" rowspan="1">FIXED</td>
+ <td colspan="1" rowspan="1">the field is FIXED if and only if it is used in the
+
+ log records for version 1.2 and higher.</td>
+
+</tr>
+
+</table>
+
+</td>
+
+</tr>
+
+<tr>
+
+<td colspan="1" rowspan="1">fieldDataLength</td>
+ <td colspan="1" rowspan="1"> The fieldDataLength is only set if the field is not NULL. It
+
+ is the length of the field that is stored on the current page.
+
+ The fieldDataLength is a variable length CompressedInt. </td>
+
+</tr>
+
+<tr>
+
+<td colspan="1" rowspan="1">fieldData</td>
+ <td colspan="1" rowspan="1">
+
+<p> Overflow page and overflow id are stored as field data. If
+
+ the overflow bit in status is set, the field data is the overflow
+
+ information. When the overflow bit is not set in status, then,
+
+ fieldData is the actually user data for the field. That means,
+
+ field header consists only field status, and field data length.
+
+ <br>
+
+ A non-overflow field:
+
+ <br>
+<img alt="" src="field-header-non-overflow.png"><br>
+
+ An overflow field:
+
+ <br>
+<img alt="" src="field-header-overflow.png"><br>
+<strong>overflowPage
+
+ and overflowID</strong>
+<br>
+
+ The overflowPage is a variable length CompressedLong, overflowID
+
+ is a variable Length CompressedInt. They are only stored when
+
+ the field state is OVERFLOW. And they are not stored in the field
+
+ header. Instead, they are stored at the end of the field data.
+
+ The reason we do that is to save a copy if the field has to overflow. </p>
+
+</td>
+
+</tr>
+
+</table>
+<div class="note">
+<div class="label">Long Columns</div>
+<div class="content"> A column is long if it can't fit on a single page. A long column
+
+ is marked as long in the base row, and it's field contains a pointer
+
+ to a chain of other rows in the same container with contain the data
+
+ of the row. Each of the subsequent rows is on a page to itself. Each
+
+ subsquent row, except for the last piece has 2 columns, the first
+
+ is the next segment of the row and the second is the pointer to the
+
+ the following segment. The last segment only has the data segment.
+
+ </div>
+</div>
+<a name="N102C3"></a><a name="slottable"></a>
+<h3 class="boxed">Slot Offset Table</h3>
+<p>The slot offset table is a table of 6 or 12 bytes per record, depending
+
+ on the pageSize being less or greater than 64K: </p>
+<table class="ForrestTable" cellspacing="1" cellpadding="4">
+
+<caption>Slot Table Record</caption>
+
+<tr>
+
+<th colspan="1" rowspan="1">Size</th>
+ <th colspan="1" rowspan="1">Content</th>
+
+</tr>
+
+<tr>
+
+<td colspan="1" rowspan="1">2 bytes (unsigned short) or 4 bytes (int)</td>
+ <td colspan="1" rowspan="1">page offset for the record that is assigned to the slot</td>
+
+</tr>
+
+<tr>
+
+<td colspan="1" rowspan="1">2 bytes (unsigned short) or 4 bytes (int)</td>
+ <td colspan="1" rowspan="1">the length of the record on this page.</td>
+
+</tr>
+
+<tr>
+
+<td colspan="1" rowspan="1">2 bytes (unsigned short) or 4 bytes (int)</td>
+ <td colspan="1" rowspan="1">the length of the reserved number of bytes for this record on
+
+ this page.</td>
+
+</tr>
+
+</table>
+<p>
+
+ First slot is slot 0. The slot table grows backwards. Slots are never
+
+ left empty. </p>
+<a name="N1030A"></a><a name="checksum"></a>
+<h3 class="boxed">Checksum</h3>
+<p>8 bytes of a java.util.zip.CRC32 checksum of the entire's page contents
+
+ without the 8 bytes representing the checksum.</p>
+</div>
+
+<a name="N10315"></a><a name="allocpage"></a>
+<h2 class="boxed">Allocation Page</h2>
+<div class="section">
+<p> An allocation page of the file container extends a normal Stored page,
+
+ with the exception that a hunk of space may be 'borrowed' by the file
+
+ container to store the file header.</p>
+<p> The borrowed space is not visible to the alloc page even though it is
+
+ present in the page data array. It is accessed directly by the FileContainer.
+
+ Any change made to the borrowed space is not managed or seen by the allocation
+
+ page.</p>
+<p> The reason for having this borrowed space is so that the container header
+
+ does not need to have a page of its own. </p>
+<p>
+
+<strong>Page Format</strong>
+
+<br>
+
+ An allocation page extends a stored page, the on disk format is different
+
+ from a stored page in that N bytes are 'borrowed' by the container and
+
+ the page header of an allocation page will be slightly bigger than a normal
+
+ stored page. This N bytes are stored between the page header and the record
+
+ space.</p>
+<p> The reason why this N bytes can't simply be a row is because it needs
+
+ to be statically accessible by the container object to avoid a chicken
+
+ and egg problem of the container object needing to instantiate an alloc
+
+ page object before it can be objectified, and an alloc page object needing
+
+ to instantiate a container object before it can be objectified. So this
+
+ N bytes must be stored outside of the normal record interface yet it must
+
+ be settable because only the first alloc page has this borrowed space.
+
+ Other (non-first) alloc page have N == 0.
+
+ <br>
+<img alt="" src="alloc-page.png"></p>
+<p>
+
+ N is a byte that indicates the size of the borrowed space. Once an alloc
+
+ page is initialized, the value of N cannot change.
+
+ </p>
+<p>
+
+ The maximum space that can be borrowed by the container is 256 bytes.
+
+ </p>
+<p>
+
+ The allocation pages are of the same page size as any other pages in the
+
+ container. The first allocation page of the FileContainer starts at the
+
+ first physical byte of the container. Subsequent allocation pages are
+
+ chained via the nextAllocPageOffset. Each allocation page is expected to
+
+ manage at least 1000 user pages (for 1K page size) so this chaining may not
+
+ be a severe performance hit. The logical -> physical mapping of an
+
+ allocation page is stored in the previous allocation page. The container
+
+ object will need to maintain this mapping.</p>
+<p>
+
+ The following fields are stored in the page header:
+
+ </p>
+<table class="ForrestTable" cellspacing="1" cellpadding="4">
+
+<caption>
+
+ Format of Alloc Page
+
+ </caption>
+
+<tr>
+
+<th colspan="1" rowspan="1">
+
+ Type
+
+ </th>
+ <th colspan="1" rowspan="1">
+
+ Description
+
+ </th>
+
+</tr>
+
+<tr>
+
+<td colspan="1" rowspan="1">
+
+ int
+
+ </td>
+ <td colspan="1" rowspan="1">
+
+ FormatId (Although 4 bytes are allocated, this uses only the first 2 bytes. Next 2 bytes are unused.)
+
+ </td>
+
+</tr>
+
+<tr>
+
+<td colspan="1" rowspan="1">StoredPageHeader</td>
+ <td colspan="1" rowspan="1">see <a href="#storedpage">Stored Page Header</a></td>
+
+</tr>
+
+<tr>
+
+<td colspan="1" rowspan="1">long</td>
+ <td colspan="1" rowspan="1">nextAllocPageNumber - the next allocation page's number</td>
+
+</tr>
+
+<tr>
+
+<td colspan="1" rowspan="1">long</td>
+ <td colspan="1" rowspan="1">nextAllocPageOffset - the file offset of the next allocation page</td>
+
+</tr>
+
+<tr>
+
+<td colspan="1" rowspan="1">long</td>
+ <td colspan="1" rowspan="1">reserved1 - reserved for future usage</td>
+
+</tr>
+
+<tr>
+
+<td colspan="1" rowspan="1">long</td>
+ <td colspan="1" rowspan="1">reserved2 - reserved for future usage</td>
+
+</tr>
+
+<tr>
+
+<td colspan="1" rowspan="1">long</td>
+ <td colspan="1" rowspan="1">reserved3 - reserved for future usage</td>
+
+</tr>
+
+<tr>
+
+<td colspan="1" rowspan="1">long</td>
+ <td colspan="1" rowspan="1">reserved4 - reserved for future usage</td>
+
+</tr>
+
+<tr>
+
+<td colspan="1" rowspan="1">byte</td>
+ <td colspan="1" rowspan="1">N - the size of the borrowed container info</td>
+
+</tr>
+
+<tr>
+
+<td colspan="1" rowspan="1">byte[N]</td>
+ <td colspan="1" rowspan="1">containerInfo - the content of the borrowed container info</td>
+
+</tr>
+
+<tr>
+
+<td colspan="1" rowspan="1">AllocExtent</td>
+ <td colspan="1" rowspan="1">The one and only extent on this alloc page.</td>
+
+</tr>
+
+</table>
+<p>
+
+ The allocation page contains allocation extent rows. In this first cut
+
+ implementation, there is only 1 allocation extent row per allocation page.
+
+ </p>
+<p>
+
+ The allocation extent row is an externalizable object and is directly
+
+ written on to the page by the alloc page. In other words, it will not be
+
+ converted in to a storeableRow. This is to cut down overhead, enhance
+
+ performance and gives more control of the size and layout of the allocation
+
+ extent row to the alloc page.
+
+ </p>
+<a name="N103EA"></a><a name="Alloc+Page+detailed+implementation+notes"></a>
+<h3 class="boxed">
+
+ Alloc Page detailed implementation notes</h3>
+<p>
+
+ Create Container - an embryonic allocation page is formatted on disk by a
+
+ special static function to avoid instantiating a full AllocPage object.
+
+ This embryonic allocation has enough information that it can find the
+
+ file header and not much else. Then the allocation page is properly
+
+ initialized by creating the first extent.
+
+ </p>
+<p>
+
+ Open Container - A static AllocPage method will be used to read off the
+
+ container information directly from disk. Even if
+
+ the first alloc page (page 0) is already in the page cache, it will not be
+
+ used because cleaning the alloc page will introduce a deadlock if the
+
+ container is not in the container cache. Long term, the first alloc page
+
+ should probably live in the container cache rather than in the page cache.
+
+ </p>
+<p>
+
+ Get Page - The first alloc page (page 0) will be read into the page cache.
+
+ Continue to follow the alloc page chain until the alloc page that manages
+
+ the specified page is found. From the alloc page, the physical offset of
+
+ the specified page is located.
+
+ </p>
+<p>
+
+ Cleaning alloc page - the alloc page is written out the same way any page
+
+ is written out. The container object will provide a call back to the alloc
+
+ page to write the current version of the container object back into the
+
+ borrowed space before the alloc page itself is written out.
+
+ </p>
+<p>
+
+ Cleaning the container object - get the the first alloc page, dirty it and
+
+ clean it (which will cause it to call the container object to write itself
+
+ out into the borrowed space). The versioning of the container is
+
+ independent of the versioning of the alloc page. The container version is
+
+ stored inside the borrowed space and is opaque to the alloc page.
+
+ </p>
+<p>For the fields in an allocation extent row.</p>
+</div>
+
+<a name="N10404"></a><a name="Allocation+Extent"></a>
+<h2 class="boxed">Allocation Extent</h2>
+<div class="section">
+<p>
+
+ An allocation extent row manages the page status of page in the extent.
+
+ AllocExtent is externalizable and is written to the AllocPage directly,
+
+ without being converted to a row first.
+
+ </p>
+<table class="ForrestTable" cellspacing="1" cellpadding="4">
+
+<caption>Format of Allocation Extent</caption>
+
+<tr>
+
+<th colspan="1" rowspan="1">Type</th>
+ <th colspan="1" rowspan="1">Description</th>
+
+</tr>
+
+<tr>
+
+<td colspan="1" rowspan="1">long</td>
+ <td colspan="1" rowspan="1">extentOffset - the begin physical byte offset of the first page of this extent</td>
+
+</tr>
+
+<tr>
+
+<td colspan="1" rowspan="1">long</td>
+ <td colspan="1" rowspan="1">extentStart - the first logical page mananged by this extent.</td>
+
+</tr>
+
+<tr>
+
+<td colspan="1" rowspan="1">long</td>
+ <td colspan="1" rowspan="1">extentEnd - the last page this extent can ever hope to manage.</td>
+
+</tr>
+
+<tr>
+
+<td colspan="1" rowspan="1">int</td>
+ <td colspan="1" rowspan="1">extentLength - the number of pages allocated in this extent</td>
+
+</tr>
+
+<tr>
+
+<td colspan="1" rowspan="1">int</td>
+ <td colspan="1" rowspan="1">
+
+<p>extentStatus - status bits for the whole extent.
+
+ <br>HAS_DEALLOCATED - most likely, this extent has a deallocated
+
+ page somewhere. If !HAD_DEALLOCATED, the extent has no deallocated page.
+
+ <br>HAS_FREE - most likely, this extent has a free page somewhere.
+
+ If !HAS_FREE, there is no free page in the extent.
+
+ <br>ALL_FREE - most likely, this extent only has free pages, good
+
+ candidate for shrinking the file.
+
+ If !ALL_FREE, the extent is not all free.
+
+ <br>HAS_UNFILLED_PAGES - most likely, this extent has unfilled pages.
+
+ if !HAS_UNFILLED_PAGES, all pages are filled.
+
+ <br>KEEP_UNFILLED_PAGES - this extent keeps track of unfilled pages
+
+ (post v1.3). If not set, this extent has no notion of
+
+ unfilled page and has no unFilledPage bitmap.
+
+ <br>NO_DEALLOC_PAGE_MAP - this extents do not have a dealloc and a
+
+ free page bit maps. Prior to 2.0, there are 2 bit
+
+ maps, a deallocate page bit map and a free page bit
+
+ map. Cloudscape 2.0 and later merged the dealloc page
+
+ bit map into the free page bit map.
+
+ <br>RETIRED - this extent contains only 'retired' pages, never use
+
+ any page from this extent. The pages don't actually
+
+ exist, i.e., it maps to nothing (physicalOffset is
+
+ garbage). The purpose of this extent is to blot out a
+
+ range of logical page numbers that no longer exists
+
+ for this container. Use this to reuse a physical page
+
+ when a logical page has exhausted all recordId or for
+
+ logical pages that has been shrunk out.
+
+ </p>
+
+</td>
+
+</tr>
+
+<tr>
+
+<td colspan="1" rowspan="1">int</td>
+ <td colspan="1" rowspan="1">preAllocLength - the number of pages that have been preallocated</td>
+
+</tr>
+
+<tr>
+
+<td colspan="1" rowspan="1">int</td>
+ <td colspan="1" rowspan="1">reserved1</td>
+
+</tr>
+
+<tr>
+
+<td colspan="1" rowspan="1">long</td>
+ <td colspan="1" rowspan="1">reserved2 - reserved for future use</td>
+
+</tr>
+
+<tr>
+
+<td colspan="1" rowspan="1">long</td>
+ <td colspan="1" rowspan="1">reserved3 - reserved for future use</td>
+
+</tr>
+
+<tr>
+
+<td colspan="1" rowspan="1">FreePages(bit)</td>
+ <td colspan="1" rowspan="1">Bitmap of free pages. Bit[i] is ON if page is free for immediate (re)use.</td>
+
+</tr>
+
+<tr>
+
+<td colspan="1" rowspan="1">unFilledPages(bit)</td>
+ <td colspan="1" rowspan="1">Bitmap of pages that have free space. Bit[i] is ON if page is likely to be < 1/2 full.</td>
+
+</tr>
+
+</table>
+<p>
+
+ org.apache.derby.iapi.services.io.FormatableBitSet is used to store the bit map.
+
+ FormatableBitSet is an externalizable class.
+
+ </p>
+<p>
+
+ A page can have the following logical state:
+
+ <br>Free - a page that is free to be used
+
+ <br>Valid - a page that is currently in use
+
+ </p>
+<p>
+
+ There is another type of transitional pages which pages that have been
+
+ allocated on disk but has not yet been used. These pages are Free.
+
+ </p>
+<p>
+
+ Bit[K] freePages
+
+ Bit[i] is ON iff page i maybe free for reuse. User must get the
+
+ dealloc page lock on the free page to make sure the transaction.
+
+ </p>
+<p>
+
+ K is the size of the bit array, it must be >= length.
+
+ </p>
+</div>
+
+</div>
+<!--+
+ |end content
+ +-->
+<div class="clearboth"> </div>
+</div>
+<div id="footer">
+<!--+
+ |start bottomstrip
+ +-->
+<div class="lastmodified">
+<script type="text/javascript"><!--
+document.write("Last Published: " + document.lastModified);
+// --></script>
+</div>
+<div class="copyright">
+ Copyright ©
+ 2004-2012 Apache Software Foundation</div>
+<div id="feedback">
+ Send feedback about the website to:
+ <a id="feedbackto" href="mailto:derby-user@db.apache.org?subject=Feedback%C2%A0papers/pageformats.html">derby-user@db.apache.org</a>
+</div>
+<!--+
+ |end bottomstrip
+ +-->
+</div>
+</body>
+</html>
Added: websites/production/db/content/derby/papers/recovery.html
==============================================================================
--- websites/production/db/content/derby/papers/recovery.html (added)
+++ websites/production/db/content/derby/papers/recovery.html Wed Dec 19 18:20:21 2012
@@ -0,0 +1,918 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html>
+<head>
+<META http-equiv="Content-Type" content="text/html; charset=UTF-8">
+<meta content="Apache Forrest" name="Generator">
+<meta name="Forrest-version" content="0.8">
+<meta name="Forrest-skin-name" content="pelt">
+<title>Derby Logging and Recovery</title>
+<link type="text/css" href="../skin/basic.css" rel="stylesheet">
+<link media="screen" type="text/css" href="../skin/screen.css" rel="stylesheet">
+<link media="print" type="text/css" href="../skin/print.css" rel="stylesheet">
+<link type="text/css" href="../skin/profile.css" rel="stylesheet">
+<script src="../skin/getBlank.js" language="javascript" type="text/javascript"></script><script src="../skin/getMenu.js" language="javascript" type="text/javascript"></script><script src="../skin/fontsize.js" language="javascript" type="text/javascript"></script>
+<link rel="shortcut icon" href="../">
+</head>
+<body onload="init()">
+<script type="text/javascript">ndeSetTextSize();</script>
+<div id="top">
+<!--+
+ |breadtrail
+ +-->
+<div class="breadtrail">
+<a href="http://www.apache.org/">apache</a> > <a href="http://db.apache.org/">db</a><script src="../skin/breadcrumbs.js" language="JavaScript" type="text/javascript"></script>
+</div>
+<!--+
+ |header
+ +-->
+<div class="header">
+<!--+
+ |start group logo
+ +-->
+<div class="grouplogo">
+<a href="http://db.apache.org/derby"><img class="logoImage" alt="Apache Derby" src="../images/derby-logo-web.png" title="Derby is a zero-admin Java RDBMS"></a>
+</div>
+<!--+
+ |end group logo
+ +-->
+<!--+
+ |start Project Logo
+ +-->
+<div class="projectlogoA1">
+<a href="http://db.apache.org"><img class="logoImage" alt="Apache DB Project" src="../images/db-logo-white.png" title="Apache DB creates and maintains database solutions."></a>
+</div>
+<!--+
+ |end Project Logo
+ +-->
+<!--+
+ |start Tabs
+ +-->
+<ul id="tabs">
+<li>
+<a class="unselected" href="../index.html">Home</a>
+</li>
+<li>
+<a class="unselected" href="../quick_start.html">Quick Start</a>
+</li>
+<li>
+<a class="unselected" href="../derby_downloads.html">Download</a>
+</li>
+<li>
+<a class="unselected" href="../derby_comm.html">Community</a>
+</li>
+<li>
+<a class="unselected" href="../manuals/index.html">Documentation</a>
+</li>
+<li class="current">
+<a class="selected" href="../blogs/index.html">Resources</a>
+</li>
+</ul>
+<!--+
+ |end Tabs
+ +-->
+</div>
+</div>
+<div id="main">
+<div id="publishedStrip">
+<!--+
+ |start Subtabs
+ +-->
+<div id="level2tabs"></div>
+<!--+
+ |end Endtabs
+ +-->
+<script type="text/javascript"><!--
+document.write("Last Published: " + document.lastModified);
+// --></script>
+</div>
+<!--+
+ |breadtrail
+ +-->
+<div class="breadtrail">
+
+
+ </div>
+<!--+
+ |start Menu, mainarea
+ +-->
+<!--+
+ |start Menu
+ +-->
+<div id="menu">
+<div onclick="SwitchMenu('menu_1.1', '../skin/')" id="menu_1.1Title" class="menutitle">Blogs and Articles About Derby</div>
+<div id="menu_1.1" class="menuitemgroup">
+<div class="menuitem">
+<a href="../blogs/index.html">Overview</a>
+</div>
+<div class="menuitem">
+<a href="../blogs/index.html#blogs">Blogs</a>
+</div>
+<div onclick="SwitchMenu('menu_1.1.3', '../skin/')" id="menu_1.1.3Title" class="menutitle">Articles</div>
+<div id="menu_1.1.3" class="menuitemgroup">
+<div onclick="SwitchMenu('menu_1.1.3.1', '../skin/')" id="menu_1.1.3.1Title" class="menutitle">Tutorials, Tips and Tuning</div>
+<div id="menu_1.1.3.1" class="menuitemgroup">
+<div class="menuitem">
+<a href="../blogs/index.html#getstarted">Getting Started</a>
+</div>
+<div class="menuitem">
+<a href="../blogs/index.html#features">Features, Hints and Tips</a>
+</div>
+<div class="menuitem">
+<a href="../blogs/index.html#security">Security</a>
+</div>
+<div class="menuitem">
+<a href="../blogs/index.html#performance">Performance and Tuning</a>
+</div>
+</div>
+<div onclick="SwitchMenu('menu_1.1.3.2', '../skin/')" id="menu_1.1.3.2Title" class="menutitle">Tools and Migration</div>
+<div id="menu_1.1.3.2" class="menuitemgroup">
+<div class="menuitem">
+<a href="../blogs/index.html#tools">Tools</a>
+</div>
+<div class="menuitem">
+<a href="../blogs/index.html#migration">Migration</a>
+</div>
+</div>
+<div onclick="SwitchMenu('menu_1.1.3.3', '../skin/')" id="menu_1.1.3.3Title" class="menutitle">Applications</div>
+<div id="menu_1.1.3.3" class="menuitemgroup">
+<div class="menuitem">
+<a href="../blogs/index.html#client">Client</a>
+</div>
+<div class="menuitem">
+<a href="../blogs/index.html#middletier">Middle Tier</a>
+</div>
+<div class="menuitem">
+<a href="../blogs/index.html#persistence">Persistence</a>
+</div>
+<div class="menuitem">
+<a href="../blogs/index.html#scalability">Scalability and Failover</a>
+</div>
+</div>
+</div>
+</div>
+<div onclick="SwitchMenu('menu_1.2', '../skin/')" id="menu_1.2Title" class="menutitle">Integration With Other Products</div>
+<div id="menu_1.2" class="menuitemgroup">
+<div class="menuitem">
+<a href="../integrate/index.html">Overview</a>
+</div>
+<div class="menuitem">
+<a href="../integrate/index.html#uses">What works with Derby?</a>
+</div>
+<div class="menuitem">
+<a href="../integrate/index.html#products">Product Writeups</a>
+</div>
+</div>
+<div onclick="SwitchMenu('menu_1.3', '../skin/')" id="menu_1.3Title" class="menutitle">Eclipse Plug-ins</div>
+<div id="menu_1.3" class="menuitemgroup">
+<div class="menuitem">
+<a href="../integrate/derby_plugin_info.html">Info</a>
+</div>
+</div>
+<div onclick="SwitchMenu('menu_selected_1.4', '../skin/')" id="menu_selected_1.4Title" class="menutitle" style="background-image: url('../skin/images/chapter_open.gif');">Papers and Presentations</div>
+<div id="menu_selected_1.4" class="selectedmenuitemgroup" style="display: block;">
+<div class="menuitem">
+<a href="../papers/index.html">Overview</a>
+</div>
+<div onclick="SwitchMenu('menu_selected_1.4.2', '../skin/')" id="menu_selected_1.4.2Title" class="menutitle" style="background-image: url('../skin/images/chapter_open.gif');">Derby Engine</div>
+<div id="menu_selected_1.4.2" class="selectedmenuitemgroup" style="display: block;">
+<div onclick="SwitchMenu('menu_1.4.2.1', '../skin/')" id="menu_1.4.2.1Title" class="menutitle">Javadoc</div>
+<div id="menu_1.4.2.1" class="menuitemgroup">
+<div class="menuitem">
+<a href="http://db.apache.org/derby/javadoc/engine">Engine</a>
+</div>
+<div class="menuitem">
+<a href="http://db.apache.org/derby/javadoc/language">Language</a>
+</div>
+<div class="menuitem">
+<a href="http://db.apache.org/derby/javadoc/tools">Tools</a>
+</div>
+<div class="menuitem">
+<a href="http://db.apache.org/derby/javadoc/publishedapi">API</a>
+</div>
+</div>
+<div class="menuitem">
+<a href="../papers/derby_arch.html">Architecture</a>
+</div>
+<div class="menuitem">
+<a href="../papers/btree_package.html">BTree</a>
+</div>
+<div class="menuitem">
+<a href="../papers/pageformats.html">Disk Page Format</a>
+</div>
+<div class="menuitem">
+<a href="../papers/derby_htw.html">How Things Work</a>
+</div>
+<div class="menuitem">
+<a href="../papers/Intersect-design.html">Intersect & Except</a>
+</div>
+<div class="menuitem">
+<a href="../papers/JDBCImplementation.html">JDBC</a>
+</div>
+<div class="menuitem">
+<a href="../papers/logformats.html">Log Format</a>
+</div>
+<div class="menupage">
+<div class="menupagetitle">Logging & Recovery</div>
+</div>
+<div class="menuitem">
+<a href="../papers/optimizer.html">Optimizer</a>
+</div>
+<div class="menuitem">
+<a href="http://db.apache.org/derby/javadoc/engine/org/apache/derby/iapi/types/package-summary.html#package_description">Type System</a>
+</div>
+<div class="menuitem">
+<a href="../papers/versionupgrade.html">Versioning</a>
+</div>
+</div>
+<div onclick="SwitchMenu('menu_1.4.3', '../skin/')" id="menu_1.4.3Title" class="menutitle">Derby Network Client</div>
+<div id="menu_1.4.3" class="menuitemgroup">
+<div class="menuitem">
+<a href="../papers/DerbyClientSpec.html">Functional Spec</a>
+</div>
+</div>
+<div onclick="SwitchMenu('menu_1.4.4', '../skin/')" id="menu_1.4.4Title" class="menutitle">Derby Tutorial</div>
+<div id="menu_1.4.4" class="menuitemgroup">
+<div class="menuitem">
+<a href="../papers/DerbyTut/index.html">Overview</a>
+</div>
+<div class="menuitem">
+<a href="../papers/DerbyTut/install_software.html">Step 1: Install Software</a>
+</div>
+<div class="menuitem">
+<a href="../papers/DerbyTut/ij_intro.html">Step 2: ij Basics</a>
+</div>
+<div class="menuitem">
+<a href="../papers/DerbyTut/embedded_intro.html">Step 3: Embedded Derby</a>
+</div>
+<div class="menuitem">
+<a href="../papers/DerbyTut/ns_intro.html">Step 4: Derby Network Server</a>
+</div>
+</div>
+<div onclick="SwitchMenu('menu_1.4.5', '../skin/')" id="menu_1.4.5Title" class="menutitle">Presentations</div>
+<div id="menu_1.4.5" class="menuitemgroup">
+<div class="menuitem">
+<a href="../papers/ApacheCon.html">ApacheCon</a>
+</div>
+<div class="menuitem">
+<a href="../papers/MiscPresentations.html#Victorian+Java+User+Group">Victorian JUG 2008</a>
+</div>
+<div class="menuitem">
+<a href="../papers/MiscPresentations.html#OSCON+2005">OSCON 2005</a>
+</div>
+<div class="menuitem">
+<a href="../papers/MiscPresentations.html#Colorado+Software+Summit+2004">Colorado 2004</a>
+</div>
+</div>
+</div>
+<!--+
+ |start Search
+ +-->
+<div class="searchbox">
+<hr>
+<form action="http://www.google.com/search" method="get">
+<input value="db.apache.org" name="sitesearch" type="hidden"><input onFocus="getBlank (this, 'Search the site with google');" size="18" name="q" id="query" type="text" value="Search the site with google">
+ <input name="Search" value="Search" type="submit">
+</form>
+</div>
+<!--+
+ |end search
+ +-->
+<div id="credit"></div>
+<div id="roundbottom">
+<img style="display: none" class="corner" height="15" width="15" alt="" src="../skin/images/rc-b-l-15-1body-2menu-3menu.png"></div>
+<!--+
+ |alternative credits
+ +-->
+<div id="credit2"></div>
+</div>
+<!--+
+ |end Menu
+ +-->
+<!--+
+ |start content
+ +-->
+<div id="content">
+<div class="trail">Font size:
+ <input value="Reset" class="resetfont" title="Reset text" onclick="ndeSetTextSize('reset'); return false;" type="button">
+ <input value="-a" class="smallerfont" title="Shrink text" onclick="ndeSetTextSize('decr'); return false;" type="button">
+ <input value="+a" class="biggerfont" title="Enlarge text" onclick="ndeSetTextSize('incr'); return false;" type="button">
+</div>
+<h1>Derby Logging and Recovery</h1>
+<div class="abstract">This document describes how Derby implements logging and recovery.
+ This is a work-in-progress derived from Javadoc comments and from explanations
+ Mike Matrigali and others posted to the Derby lists. Please post questions,
+ comments, and corrections to derby-dev@db.apache.org. </div>
+<div id="minitoc-area">
+<ul class="minitoc">
+<li>
+<a href="#introduction"> Introduction </a>
+</li>
+<li>
+<a href="#ARIES+-+An+Overview">ARIES - An Overview</a>
+</li>
+<li>
+<a href="#Features+of+ARIES">Features of ARIES</a>
+</li>
+<li>
+<a href="#References">References</a>
+</li>
+<li>
+<a href="#Derby+implementation+of+ARIES">Derby implementation of ARIES</a>
+</li>
+<li>
+<a href="#Derby+recovery+process">Derby recovery process</a>
+</li>
+<li>
+<a href="#Recovery+Redo+pass">Recovery Redo pass</a>
+</li>
+<li>
+<a href="#Recovery+Undo+pass">Recovery Undo pass</a>
+</li>
+<li>
+<a href="#Checkpoints">Checkpoints</a>
+</li>
+<li>
+<a href="#Derby+Logging+Overview">Derby Logging Overview</a>
+</li>
+<li>
+<a href="#Loggable+Interface+Hierarchy">Loggable Interface Hierarchy</a>
+</li>
+<li>
+<a href="#Container+Log+Operations+Hierarchy">Container Log Operations Hierarchy</a>
+</li>
+<li>
+<a href="#Transaction+Management+Log+Operations+Hierarchy">Transaction Management Log Operations Hierarchy</a>
+</li>
+<li>
+<a href="#Page+Level+Log+Operations+Hierarchy">Page Level Log Operations Hierarchy</a>
+</li>
+</ul>
+</div>
+
+<a name="N10010"></a><a name="introduction"></a>
+<h2 class="boxed"> Introduction </h2>
+<div class="section">
+<p>Derby transaction logging and recovery is based upon the ARIES algorithm.</p>
+</div>
+
+<a name="N1001A"></a><a name="ARIES+-+An+Overview"></a>
+<h2 class="boxed">ARIES - An Overview</h2>
+<div class="section">
+<p>Following is a brief description of the main principles behind ARIES.</p>
+<p>Firstly, in ARIES, changes always take the system forward. That is to say,
+even transaction rollbacks are treated as if they are updates to the system.
+This is counter-inituitive to what the user thinks, because when a user asks for a
+transaction to be rolled back, they assume that the system is going back
+to a previous state of affairs. However, from the perspective of ARIES, there
+is no such thing as going back. For example, if a transaction changes A to B
+and then rolls back, ARIES treats the rollback as simply an update that
+changes B to A. The forward change from A to B (redo) and the reversal of B to
+A (undo) are both recorded as updates to the system. Changes during normal
+operations are recorded as Redo-Undo log records. As the name implies, these
+log records can be 'redone' in case of a system crash, or 'undone' in case a
+rollback is required. Changes made during rollbacks, however, are recorded as
+Redo-only log records. These log records are called Compensation Log Records
+(CLRs). The reason these are redo only is that by definition a rollback does
+not need to be undone, whereas normal updates need to be undone if the
+transaction decides to rollback.
+</p>
+<p>The second basic principle of ARIES is that during recovery, history
+ is repeated. This can be explained as follows.</p>
+<p>When a system crashes, there would be some transactions that have completed
+ (committed or aborted), and others that are still active. The WAL protocol
+ ensures that changes made by completed transactions have been recorded
+ in the Log. Changes made by incomplete transactions may also be present
+ in the Log, because Log Records are created in the same order as the changes
+ are made by the system.</p>
+<p>During recovery, ARIES initially replays the Log to the bring the system
+ back to a state close to that when the crash occurred. This means that
+ ARIES replays the effects of not only those transactions that committed
+ or aborted, but also those that were active at the time of the crash.
+ Having brought the system to this state, ARIES then identifies transactions
+ that were incomplete, and rolls them back. The basic idea is to repeat
+ the entire history upto the point of crash, and then undo failed transactions.</p>
+<p>This approach has the advantage that during the redo phase, changes can
+ be replayed at a fairly low level, for example, the level of a disk page.
+ ARIES calls this page oriented redo. This feature is significant because
+ it means that until the redo phase is over, the system does not need to
+ know about higher level data structures such as Indexes. Only during the
+ undo phase, when incomplete transactions are being rolled back, does the
+ system need to know about high level data structures. </p>
+</div>
+
+<a name="N10033"></a><a name="Features+of+ARIES"></a>
+<h2 class="boxed">Features of ARIES</h2>
+<div class="section">
+<p>ARIES includes a number of optimisations to reduce the amount of work
+ required during normal operations and recovery.</p>
+<p>One optimisation is to avoid application of log records unnecessarily.
+ The LSN of the most recently generated log record is stored in each disk
+ page. This is known as the PageLsn. The PageLsn allows ARIES to determine
+ during the redo phase, whether the changes represented by a log record
+ have been applied to the page or not.</p>
+<p>ARIES chains log records for transactions in such a way that those records
+ that are no longer necessary, are skipped during recovery. For example,
+ if a transaction changed A to B, and then rolled back, generating a log
+ record for changing B to A, then during recovery, ARIES would automatically
+ skip the log record that represents the change from A to B. This is made
+ possible by maintaining a UndoLsn pointer in every Log Record. The UndoLsn
+ normally points to the previous log record generated by the transaction.
+ However, in log records generated during Rollback (known as Compensation
+ Log Records), the UndoLsn is made to point to the Log record preceding
+ the one that is being undone. To take an example, let us assume that a
+ transaction generated log record 1, containing change from A to B, then
+ log record 2 containing change from B to C. At this point the transaction
+ decides to rollback the change from B to C. It therefore generates a new
+ log record 3, containing a change from C to B. The UndoLsn of this log
+ record is made to point at log record 1, instead of log record 2. When
+ following the UndoLsn chain, ARIES would skip log record 2.</p>
+<p>ARIES also supports efficient checkpoints. During a checkpoint, it is
+ not necessary to flush all database pages to disk. Instead ARIES records
+ a list of dirty buffer pages along with their RecoveryLsn(s). The RecoveryLsn
+ of a page is the LSN of the earliest log record that represents a change
+ to the page since it was read from disk. By using this list, ARIES is
+ able to determine during recovery, where to start replaying the Log.</p>
+<p>ARIES supports nested top-level action concept whereby part of a transaction
+ can be commited even if the transaction aborts. This is useful for situations
+ where a structural change should not be undone even if the transaction
+ aborts. Nested top level actions are implemented using Dummy Compensation
+ Log Records - and make use of the ability to skip logs records using the
+ UndoLsn pointer as described previously.</p>
+</div>
+
+<a name="N10049"></a><a name="References"></a>
+<h2 class="boxed">References</h2>
+<div class="section">
+<ol>
+
+<li>
+<p> For a full description of ARIES, please see
+ <em>Mohan, C., Haderle, D., Lindsay, B., Pirahesh, H., Schwarz, P. ARIES:
+ A Transaction Recovery Method Supporting Fine-Granularity Locking and
+ Partial Rollbacks Using Write-Ahead Logging, ACM Transactions on Database
+ Systems, Vol. 17, No. 1, March 1992, pp94-162.</em>
+ A version of this document is freely available as
+ <a class="external" href="http://www.almaden.ibm.com/u/mohan/RJ6649Rev.pdf">IBM Research
+ Report RJ6649</a>.</p>
+
+</li>
+
+<li>
+<p> A good description of Write Ahead Logging, and how a log is typically
+ implemented, can be found in
+ <em>
+ <a class="external" href="http://portal.acm.org/citation.cfm?id=573304">Transaction
+ Processing: Concepts and Techniques</a>
+ , by Jim Gray and Andreas Reuter, 1993, Morgan Kaufmann Publishers</em>
+ .</p>
+
+</li>
+
+</ol>
+</div>
+
+<a name="N1006B"></a><a name="Derby+implementation+of+ARIES"></a>
+<h2 class="boxed">Derby implementation of ARIES</h2>
+<div class="section">
+<p>I shall only describe how Derby differs from standard ARIES implementation.
+ Therefore, for a full understanding of the logging and recovery mechanisms
+ in Derby, it is necessary to consult above mentioned papers on ARIES.</p>
+<p>Derby uses Log Sequence Numbers to identify Log records. In Derby terminology,
+ LSNs are called LogInstants. LogCounter is an implementation of LogInstant.</p>
+<p>Although Derby uses LogInstant, it does not save this with the page data.
+ Instead, a page version number is stored. The page version number is also
+ stored in the log records associated with the page. During recovery (redo),
+ Derby uses the page version to determine whether the page needs redo or
+ not. Here is a comment on the rationale behind this:</p>
+<p class="quote">
+
+<em>Mike Matrigali:</em>
+
+<br>
+ Am going to defer on page version vs. LSN question, but at least mention
+ some guesses, not researched info. You are right bout what exists. I spoke
+ with some colleagues and the best we can come up with is that the implementor
+ wanted to separate the page and the log, in case we ever did a different
+ log format. I will try to do some more research here. I also vaguely remember
+ the implementor mentioning if we ever wanted to implement the LSN on the
+ page, we had space to do so. It may simply have been easier to code the
+ page versions, since in the current system the LSN is the address in the
+ log file (which and it may not be available when the code wants to write
+ it on the page).
+ <br>
+ As you say in derby all the log records are associated with a page, and
+ thus have a page version number. That page version number in the log is
+ compared with the page version number of the page during redo to determine
+ if the log record needs to be applied. This also has helped us in the
+ past to catch some bugs as we can sanity check during redo that the page
+ is progressing along the expected path, ie. it is a bug during redo to
+ be applying a page version 10 log record to page that is at page version
+ 8. I haven't seen this sanity check in many years, but was useful when
+ the product was first coded. </p>
+<p>Derby does not write the dirty pages list within a Checkpoint record.
+ Instead, during checkpoint, Derby flushes all database pages to
+ disk. The redo Low Water Mark (redoLWM) is set to the current LSN when the
+ checkpoint starts. The undo Low Water Mark (undoLWM) is set to the
+ starting LSN of the oldest active transaction. At restart, Derby replays
+ the log from redoLWM or undoLWM whichever is earlier. For a good description
+ of concepts behind the checkpoint method used by Derby, and the use of redo/undo Low
+ Water Marks, please refer to TPCT book (Section 11.3).</p>
+<p>Derby uses 'internal' transactions instead of nested top-level actions
+ to separate structural changes from normal operations. Internal transactions
+ have the property that they are always page-oriented and do not require
+ logical undo, ie, undo is always physical. Also, during recovery, incomplete
+ internal transactions
+ are undone before any regular transactions. In ARIES, no special processing
+ is required to handle this, as nested top-level actions are automatically
+ handled as part of normal redo, and are skipped during undo unless they
+ are incomplete, in which case they are undone.</p>
+<p>ARIES uses three passes during recovery. The first pass is the analysis
+ pass when ARIES collects information and determines where redo must start.
+ This is followed by the redo pass, and then by the undo pass. Derby omits
+ the analysis pass as this is not required due to the way checkpoints are
+ done.</p>
+</div>
+
+<a name="N1008F"></a><a name="Derby+recovery+process"></a>
+<h2 class="boxed">Derby recovery process</h2>
+<div class="section">
+<p>Implemented in <span class="codefrag">org.apache.derby.impl.store.raw.log.LogToFile.recover()</span>
+</p>
+<p>Following is a high level description of Derby recovery process in Derby.</p>
+<p> In this implementation, the log is a stream of log records stored in
+ one or more flat files. Recovery is done in 2 passes: redo and undo. </p>
+<dl>
+
+<dt>Redo pass </dt>
+
+<dd> In the redo pass, reconstruct the state of the rawstore by repeating
+ exactly what happened before as recorded in the log. </dd>
+
+<dt>Undo pass </dt>
+
+<dd> In the undo pass, all incomplete transactions are rolled back in
+ the order from the most recently started to the oldest.</dd>
+
+</dl>
+</div>
+
+<a name="N100B0"></a><a name="Recovery+Redo+pass"></a>
+<h2 class="boxed">Recovery Redo pass</h2>
+<div class="section">
+<p>Implemented in <span class="codefrag">org.apache.derby.impl.store.raw.log.FileLogger.redo()</span>
+</p>
+<p> The log stream is scanned from the beginning (or
+ from the undo low water mark of a checkpoint) forward until the end.
+ The purpose of the redo pass is to repeat history, i.e, to repeat
+ exactly the same set of changes the rawStore went thru right before it
+ stopped. With each log record that is encountered in the redo pass:</p>
+<ol>
+
+<li>if it isFirst(), then the transaction factory is called upon to
+ create a new transaction object.</li>
+
+<li>if it needsRedo(), its doMe() is called (if it is a compensation
+ operation, then the undoable operation needs to be created first
+ before the doMe is called).</li>
+
+<li>if it isComplete(), then the transaction object is closed.</li>
+
+</ol>
+</div>
+
+<a name="N100CB"></a><a name="Recovery+Undo+pass"></a>
+<h2 class="boxed">Recovery Undo pass</h2>
+<div class="section">
+<p>Implemented in <span class="codefrag">org.apache.derby.impl.store.raw.xact.XactFactory.rollbackAllTransactions()</span>
+</p>
+<p> Rollback all active transactions that has updated the raw store.
+ Transactions are rolled back in the following order:</p>
+<ol>
+
+<li>Internal transactions in reversed beginXact chronological order</li>
+
+<li>all other transactions in reversed beginXact chronological order</li>
+
+</ol>
+</div>
+
+<a name="N100E3"></a><a name="Checkpoints"></a>
+<h2 class="boxed">Checkpoints</h2>
+<div class="section">
+<p>Implemented in <span class="codefrag">org.apache.derby.impl.store.raw.log.LogToFile.checkpoint()</span>
+</p>
+<p>Only one checkpoint is to be taking place at any given time.</p>
+<p>The steps of a checkpoint are:</p>
+<ol>
+
+<li>
+<p>Switch to a new log file if possible.</p>
+
+<ol>
+
+<li>Freeze the log (for the transition to a new log file)</li>
+
+<li>Flush current log file</li>
+
+<li>Create and flush the new log file (with file number 1 higher
+ than the previous log file). The new log file becomes the
+ current log file.</li>
+
+<li>Unfreeze the log</li>
+
+</ol>
+
+</li>
+
+<li>Start checkpoint transaction</li>
+
+<li>
+<p>Gather interesting information about the rawStore: </p>
+
+<ol>
+
+<li>The current log instant (redoLWM)</li>
+
+<li>The earliest active transaction begin tran log record instant (undoLWM)
+ , all the truncation LWM set by clients of raw store
+ (replication)</li>
+
+</ol>
+
+</li>
+
+<li>Clean the buffer cache</li>
+
+<li>Log the next checkpoint log record, which contains (repPoint,
+ undoLWM, redoLWM) and commit checkpoint transaction.</li>
+
+<li>Synchronously write the control file containing the next checkpoint
+ log record log instant</li>
+
+<li>The new checkpoint becomes the current checkpoint. Somewhere near
+ the beginning of each log file should be a checkpoint log record (not
+ guarenteed to be there)</li>
+
+<li>See if the log can be truncated</li>
+
+</ol>
+<p>The earliest useful log record is determined by the repPoint and the
+ undoLWM, whichever is earlier.</p>
+<p>Every log file whose log file number is smaller than the earliest useful
+ log record's log file number can be deleted.</p>
+<p>Transactions can be at the following states w/r to a checkpoint -
+ consider the log as a continous stream and not as series of log files for
+ the sake of clarity:<br>
+ <!-- <img src="checkpoint.png" alt=""/> -->
+
+</p>
+<pre class="code">
+|(BT)-------(ET)| marks the begin and end of a transaction.
+. checkpoint started
+. |__undoLWM |
+. V |___redoLWM
+. |___TruncationLWM
+. |
+. V
+1 |-----------------|
+2 |--------------------------------|
+3 |-------|
+4 |--------------------------------------(end of log)
+5 |-^-|
+. Checkpoint Log Record
+---A--->|<-------B--------->|<-------------C-----------
+</pre>
+<p>
+ There are only 3 periods of interest :<br>
+ A) before undoLWM, B) between undo and redo LWM, C) after redoLWM.
+ </p>
+<p>
+ Transaction 1 started in A and terminates in B.<br>
+ During redo, we should only see log records and endXact from this
+ transaction in the first phase (between undoLWM and redoLWM). No
+ beginXact log record for this transaction will be seen.
+ </p>
+<p>
+ Transaction 2 started in B (right on the undoLWM) and terminated in C.
+ <br>
+ Any transaction that terminates in C must have a beginXact at or after
+ undoLWM. In other words, no transaction can span A, B and C. During redo,
+ we will see beginXact, other log records and endXact for this
+ transaction.
+ </p>
+<p>
+ Transaction 3 started in B and ended in B.<br>
+ During redo, we will see beginXact, other log records and endXact for
+ this transaction.
+ </p>
+<p>
+ Transaction 4 begins in B and never ends. <br>
+ During redo, we will see beginXact, other log records. In undo, this
+ loser transaction will be rolled back.
+ </p>
+<p>
+ Transaction 5 is the transaction taking the checkpoint. <br>
+ The checkpoint action started way back in time but the checkpoint log
+ record is only written after the buffer cache has been flushed.
+ </p>
+<p>
+ Note that if any time elapse between taking the undoLWM and the redoLWM,
+ then it will create a 4th period of interest.
+ </p>
+</div>
+
+<a name="N1015E"></a><a name="Derby+Logging+Overview"></a>
+<h2 class="boxed">Derby Logging Overview</h2>
+<div class="section">
+<p>A loggable action in Derby is redoable. If the action implements Undoable interface, then it is also
+ undoable. When an undoable action is rolled back, it must generate a Compensation log which represents
+ the action necessary to repeat the undo.
+ </p>
+<p>Normally a logged action is rolled back on the same page that it was originally applied to. This is
+ called physical or physiological undo. If the undo needs to be applied to a different page (such as due to
+ a page split in a BTree), then it is called
+ a Logical Undo. In Derby, BTree inserts and deletes require logical undo.</p>
+<p>When performing a loggable action, Derby follows this sequence:</p>
+<ol>
+
+<li>Convert the action into a corresponding log operation. Most BTree and Heap operations are
+ translated to Page level actions - ie - the action involves updating one or more pages. For example,
+ a single Heap row insert may be translated to inserts on several pages. Each page insert
+ will be a separate loggable action.</li>
+
+<li>Generate the log data that describes the page level action.</li>
+
+<li>Perform the action <em>after</em> it has been logged. Also, the action is
+ performed using the logged data, in the same way as it would be performed during recovery.
+ In other words, the logged data is used both for normal operations as well as for repeating
+ history. This has the advantage that the recovery execution path is the same as the execution
+ path during normal execution.</li>
+
+<li>If a transaction is being rolled back, first the loggable action is asked to generate
+ the corresponding undo (Compensation) log data. This is then logged, and after that it is performed.
+ As described before, a Compensation action is only redoable, because by definition, an undo
+ action does not need to be undone.
+ </li>
+
+</ol>
+</div>
+
+<a name="N10180"></a><a name="Loggable+Interface+Hierarchy"></a>
+<h2 class="boxed">Loggable Interface Hierarchy</h2>
+<div class="section">
+<ul>
+
+<li>interface org.apache.derby.iapi.store.raw.<a href="http://db.apache.org/derby/javadoc/engine/org/apache/derby/iapi/store/raw/Loggable.html" title="interface in org.apache.derby.iapi.store.raw"><strong>Loggable</strong></a>
+
+<ul>
+
+<li>interface org.apache.derby.iapi.store.raw.<a href="http://db.apache.org/derby/javadoc/engine/org/apache/derby/iapi/store/raw/Compensation.html" title="interface in org.apache.derby.iapi.store.raw"><strong>Compensation</strong></a>
+</li>
+
+<li>interface org.apache.derby.iapi.store.raw.<a href="http://db.apache.org/derby/javadoc/engine/org/apache/derby/iapi/store/raw/Undoable.html" title="interface in org.apache.derby.iapi.store.raw"><strong>Undoable</strong></a>
+
+<ul>
+
+<li>interface org.apache.derby.iapi.store.raw.<a href="http://db.apache.org/derby/javadoc/engine/org/apache/derby/iapi/store/raw/LogicalUndoable.html" title="interface in org.apache.derby.iapi.store.raw"><strong>LogicalUndoable</strong></a>
+</li>
+
+</ul>
+
+</li>
+
+</ul>
+
+</li>
+
+</ul>
+</div>
+
+<a name="N101B2"></a><a name="Container+Log+Operations+Hierarchy"></a>
+<h2 class="boxed">Container Log Operations Hierarchy</h2>
+<div class="section">
+<ul>
+
+<li>class org.apache.derby.impl.store.raw.data.<a href="http://db.apache.org/derby/javadoc/engine/org/apache/derby/impl/store/raw/data/ContainerBasicOperation.html" title="class in org.apache.derby.impl.store.raw.data"><strong>ContainerBasicOperation</strong></a> (implements org.apache.derby.iapi.store.raw.<a href="http://db.apache.org/derby/javadoc/engine/org/apache/derby/iapi/store/raw/Loggable.html" title="interface in org.apache.derby.iapi.store.raw">Loggable</a>)
+ <ul>
+
+<li>class org.apache.derby.impl.store.raw.data.<a href="http://db.apache.org/derby/javadoc/engine/org/apache/derby/impl/store/raw/data/ContainerOperation.html" title="class in org.apache.derby.impl.store.raw.data"><strong>ContainerOperation</strong></a> (implements org.apache.derby.iapi.store.raw.<a href="http://db.apache.org/derby/javadoc/engine/org/apache/derby/iapi/store/raw/Undoable.html" title="interface in org.apache.derby.iapi.store.raw">Undoable</a>)</li>
+
+<li>class org.apache.derby.impl.store.raw.data.<a href="http://db.apache.org/derby/javadoc/engine/org/apache/derby/impl/store/raw/data/ContainerUndoOperation.html" title="class in org.apache.derby.impl.store.raw.data"><strong>ContainerUndoOperation</strong></a> (implements org.apache.derby.iapi.store.raw.<a href="http://db.apache.org/derby/javadoc/engine/org/apache/derby/iapi/store/raw/Compensation.html" title="interface in org.apache.derby.iapi.store.raw">Compensation</a>)</li>
+
+</ul>
+
+</li>
+
+<li>class org.apache.derby.impl.store.raw.data.<a href="http://db.apache.org/derby/javadoc/engine/org/apache/derby/impl/store/raw/data/RemoveFileOperation.html" title="class in org.apache.derby.impl.store.raw.data"><strong>RemoveFileOperation</strong></a> (implements org.apache.derby.iapi.store.raw.<a href="http://db.apache.org/derby/javadoc/engine/org/apache/derby/iapi/store/raw/Undoable.html" title="interface in org.apache.derby.iapi.store.raw">Undoable</a>)</li>
+
+</ul>
+</div>
+
+<a name="N101F7"></a><a name="Transaction+Management+Log+Operations+Hierarchy"></a>
+<h2 class="boxed">Transaction Management Log Operations Hierarchy</h2>
+<div class="section">
+<ul>
+
+<li>class org.apache.derby.impl.store.raw.xact.<a href="http://db.apache.org/derby/javadoc/engine/org/apache/derby/impl/store/raw/xact/BeginXact.html" title="class in org.apache.derby.impl.store.raw.xact"><strong>BeginXact</strong></a> (implements org.apache.derby.iapi.store.raw.<a href="http://db.apache.org/derby/javadoc/engine/org/apache/derby/iapi/store/raw/Loggable.html" title="interface in org.apache.derby.iapi.store.raw">Loggable</a>)</li>
+
+<li>class org.apache.derby.impl.store.raw.xact.<a href="http://db.apache.org/derby/javadoc/engine/org/apache/derby/impl/store/raw/xact/EndXact.html" title="class in org.apache.derby.impl.store.raw.xact"><strong>EndXact</strong></a> (implements org.apache.derby.iapi.store.raw.<a href="http://db.apache.org/derby/javadoc/engine/org/apache/derby/iapi/store/raw/Loggable.html" title="interface in org.apache.derby.iapi.store.raw">Loggable</a>)</li>
+
+<li>class org.apache.derby.impl.store.raw.log.<a href="http://db.apache.org/derby/javadoc/engine/org/apache/derby/impl/store/raw/log/CheckpointOperation.html" title="class in org.apache.derby.impl.store.raw.log"><strong>CheckpointOperation</strong></a> (implements org.apache.derby.iapi.store.raw.<a href="http://db.apache.org/derby/javadoc/engine/org/apache/derby/iapi/store/raw/Loggable.html" title="interface in org.apache.derby.iapi.store.raw">Loggable</a>)</li>
+
+</ul>
+</div>
+
+<a name="N1022B"></a><a name="Page+Level+Log+Operations+Hierarchy"></a>
+<h2 class="boxed">Page Level Log Operations Hierarchy</h2>
+<div class="section">
+<ul>
+
+<li>class org.apache.derby.impl.store.raw.data.<a href="http://db.apache.org/derby/javadoc/engine/org/apache/derby/impl/store/raw/data/PageBasicOperation.html" title="class in org.apache.derby.impl.store.raw.data"><strong>PageBasicOperation</strong></a> (implements org.apache.derby.iapi.store.raw.<a href="http://db.apache.org/derby/javadoc/engine/org/apache/derby/iapi/store/raw/Loggable.html" title="interface in org.apache.derby.iapi.store.raw">Loggable</a>, org.apache.derby.iapi.store.raw.<a href="http://db.apache.org/derby/javadoc/engine/org/apache/derby/iapi/store/raw/RePreparable.html" title="interface in org.apache.derby.iapi.store.raw">RePreparable</a>)
+ <ul>
+
+<li>class org.apache.derby.impl.store.raw.data.<a href="http://db.apache.org/derby/javadoc/engine/org/apache/derby/impl/store/raw/data/LogicalPageOperation.html" title="class in org.apache.derby.impl.store.raw.data"><strong>LogicalPageOperation</strong></a> (implements org.apache.derby.iapi.store.raw.<a href="http://db.apache.org/derby/javadoc/engine/org/apache/derby/iapi/store/raw/LogicalUndoable.html" title="interface in org.apache.derby.iapi.store.raw">LogicalUndoable</a>)
+ <ul>
+
+<li>class org.apache.derby.impl.store.raw.data.<a href="http://db.apache.org/derby/javadoc/engine/org/apache/derby/impl/store/raw/data/DeleteOperation.html" title="class in org.apache.derby.impl.store.raw.data"><strong>DeleteOperation</strong></a>
+</li>
+
+<li>class org.apache.derby.impl.store.raw.data.<a href="http://db.apache.org/derby/javadoc/engine/org/apache/derby/impl/store/raw/data/InsertOperation.html" title="class in org.apache.derby.impl.store.raw.data"><strong>InsertOperation</strong></a>
+</li>
+
+<li>class org.apache.derby.impl.store.raw.data.<a href="http://db.apache.org/derby/javadoc/engine/org/apache/derby/impl/store/raw/data/UpdateFieldOperation.html" title="class in org.apache.derby.impl.store.raw.data"><strong>UpdateFieldOperation</strong></a>
+</li>
+
+</ul>
+
+</li>
+
+<li>class org.apache.derby.impl.store.raw.data.<a href="http://db.apache.org/derby/javadoc/engine/org/apache/derby/impl/store/raw/data/LogicalUndoOperation.html" title="class in org.apache.derby.impl.store.raw.data"><strong>LogicalUndoOperation</strong></a> (implements org.apache.derby.iapi.store.raw.<a href="http://db.apache.org/derby/javadoc/engine/org/apache/derby/iapi/store/raw/Compensation.html" title="interface in org.apache.derby.iapi.store.raw">Compensation</a>)</li>
+
+<li>class org.apache.derby.impl.store.raw.data.<a href="http://db.apache.org/derby/javadoc/engine/org/apache/derby/impl/store/raw/data/PhysicalPageOperation.html" title="class in org.apache.derby.impl.store.raw.data"><strong>PhysicalPageOperation</strong></a> (implements org.apache.derby.iapi.store.raw.<a href="http://db.apache.org/derby/javadoc/engine/org/apache/derby/iapi/store/raw/Undoable.html" title="interface in org.apache.derby.iapi.store.raw">Undoable</a>)
+ <ul>
+
+<li>class org.apache.derby.impl.store.raw.data.<a href="http://db.apache.org/derby/javadoc/engine/org/apache/derby/impl/store/raw/data/AllocPageOperation.html" title="class in org.apache.derby.impl.store.raw.data"><strong>AllocPageOperation</strong></a>
+</li>
+
+<li>class org.apache.derby.impl.store.raw.data.<a href="http://db.apache.org/derby/javadoc/engine/org/apache/derby/impl/store/raw/data/ChainAllocPageOperation.html" title="class in org.apache.derby.impl.store.raw.data"><strong>ChainAllocPageOperation</strong></a>
+</li>
+
+<li>class org.apache.derby.impl.store.raw.data.<a href="http://db.apache.org/derby/javadoc/engine/org/apache/derby/impl/store/raw/data/CopyRowsOperation.html" title="class in org.apache.derby.impl.store.raw.data"><strong>CopyRowsOperation</strong></a>
+</li>
+
+<li>class org.apache.derby.impl.store.raw.data.<a href="http://db.apache.org/derby/javadoc/engine/org/apache/derby/impl/store/raw/data/InitPageOperation.html" title="class in org.apache.derby.impl.store.raw.data"><strong>InitPageOperation</strong></a>
+</li>
+
+<li>class org.apache.derby.impl.store.raw.data.<a href="http://db.apache.org/derby/javadoc/engine/org/apache/derby/impl/store/raw/data/InvalidatePageOperation.html" title="class in org.apache.derby.impl.store.raw.data"><strong>InvalidatePageOperation</strong></a>
+</li>
+
+<li>class org.apache.derby.impl.store.raw.data.<a href="http://db.apache.org/derby/javadoc/engine/org/apache/derby/impl/store/raw/data/PurgeOperation.html" title="class in org.apache.derby.impl.store.raw.data"><strong>PurgeOperation</strong></a>
+</li>
+
+<li>class org.apache.derby.impl.store.raw.data.<a href="http://db.apache.org/derby/javadoc/engine/org/apache/derby/impl/store/raw/data/UpdateOperation.html" title="class in org.apache.derby.impl.store.raw.data"><strong>UpdateOperation</strong></a>
+</li>
+
+</ul>
+
+</li>
+
+<li>class org.apache.derby.impl.store.raw.data.<a href="http://db.apache.org/derby/javadoc/engine/org/apache/derby/impl/store/raw/data/PhysicalUndoOperation.html" title="class in org.apache.derby.impl.store.raw.data"><strong>PhysicalUndoOperation</strong></a> (implements org.apache.derby.iapi.store.raw.<a href="http://db.apache.org/derby/javadoc/engine/org/apache/derby/iapi/store/raw/Compensation.html" title="interface in org.apache.derby.iapi.store.raw">Compensation</a>)</li>
+
+<li>class org.apache.derby.impl.store.raw.data.<a href="http://db.apache.org/derby/javadoc/engine/org/apache/derby/impl/store/raw/data/SetReservedSpaceOperation.html" title="class in org.apache.derby.impl.store.raw.data"><strong>SetReservedSpaceOperation</strong></a>
+</li>
+
+</ul>
+
+</li>
+
+</ul>
+</div>
+
+</div>
+<!--+
+ |end content
+ +-->
+<div class="clearboth"> </div>
+</div>
+<div id="footer">
+<!--+
+ |start bottomstrip
+ +-->
+<div class="lastmodified">
+<script type="text/javascript"><!--
+document.write("Last Published: " + document.lastModified);
+// --></script>
+</div>
+<div class="copyright">
+ Copyright ©
+ 2004-2012 Apache Software Foundation</div>
+<div id="feedback">
+ Send feedback about the website to:
+ <a id="feedbackto" href="mailto:derby-user@db.apache.org?subject=Feedback%C2%A0papers/recovery.html">derby-user@db.apache.org</a>
+</div>
+<!--+
+ |end bottomstrip
+ +-->
+</div>
+</body>
+</html>