You are viewing a plain text version of this content. The canonical link for it is here.
Posted to derby-commits@db.apache.org by jt...@apache.org on 2005/02/28 22:56:37 UTC

svn commit: r155705 - in incubator/derby/site/trunk: build/site/ build/site/papers/ build/site/releases/ src/documentation/content/papers/ src/documentation/content/xdocs/ src/documentation/content/xdocs/papers/

Author: jta
Date: Mon Feb 28 13:56:34 2005
New Revision: 155705

URL: http://svn.apache.org/viewcvs?view=rev&rev=155705
Log:
Added Jack Jack Klebanoff's Intersect-design.html

Added:
    incubator/derby/site/trunk/build/site/papers/Intersect-design.html   (with props)
    incubator/derby/site/trunk/src/documentation/content/papers/
    incubator/derby/site/trunk/src/documentation/content/papers/Intersect-design.html   (with props)
Modified:
    incubator/derby/site/trunk/build/site/linkmap.html
    incubator/derby/site/trunk/build/site/papers/ApacheConUs04.html
    incubator/derby/site/trunk/build/site/papers/JDBCImplementation.html
    incubator/derby/site/trunk/build/site/papers/MiscPresentations.html
    incubator/derby/site/trunk/build/site/papers/derby_arch.html
    incubator/derby/site/trunk/build/site/papers/derby_htw.html
    incubator/derby/site/trunk/build/site/papers/derby_web.html
    incubator/derby/site/trunk/build/site/papers/fortune_tut.html
    incubator/derby/site/trunk/build/site/papers/index.html
    incubator/derby/site/trunk/build/site/papers/logformats.html
    incubator/derby/site/trunk/build/site/papers/pageformats.html
    incubator/derby/site/trunk/build/site/papers/recovery.html
    incubator/derby/site/trunk/build/site/releases/release-10.0.2.1.html
    incubator/derby/site/trunk/src/documentation/content/xdocs/papers/index.xml
    incubator/derby/site/trunk/src/documentation/content/xdocs/site.xml

Modified: incubator/derby/site/trunk/build/site/linkmap.html
URL: http://svn.apache.org/viewcvs/incubator/derby/site/trunk/build/site/linkmap.html?view=diff&r1=155704&r2=155705
==============================================================================
--- incubator/derby/site/trunk/build/site/linkmap.html (original)
+++ incubator/derby/site/trunk/build/site/linkmap.html Mon Feb 28 13:56:34 2005
@@ -165,6 +165,9 @@
 <a title="" href="papers/derby_htw.html">How Things Work</a>
 </div>
 <div class="menuitem">
+<a title="" href="papers/Intersect-design.html">Intersect &amp; Except</a>
+</div>
+<div class="menuitem">
 <a title="" href="papers/JDBCImplementation.html">JDBC</a>
 </div>
 <div class="menuitem">
@@ -398,6 +401,10 @@
       
 <li>
 <a href="papers/derby_htw.html">How Things Work</a>&nbsp;&nbsp;&nbsp;_________________________&nbsp;&nbsp;<em>misc</em>
+</li>
+      
+<li>
+<a href="papers/Intersect-design.html">Intersect &amp; Except</a>&nbsp;&nbsp;&nbsp;_________________________&nbsp;&nbsp;<em>intersect</em>
 </li>
       
 <li>

Modified: incubator/derby/site/trunk/build/site/papers/ApacheConUs04.html
URL: http://svn.apache.org/viewcvs/incubator/derby/site/trunk/build/site/papers/ApacheConUs04.html?view=diff&r1=155704&r2=155705
==============================================================================
--- incubator/derby/site/trunk/build/site/papers/ApacheConUs04.html (original)
+++ incubator/derby/site/trunk/build/site/papers/ApacheConUs04.html Mon Feb 28 13:56:34 2005
@@ -129,6 +129,9 @@
 <a title="" href="../papers/derby_htw.html">How Things Work</a>
 </div>
 <div class="menuitem">
+<a title="" href="../papers/Intersect-design.html">Intersect &amp; Except</a>
+</div>
+<div class="menuitem">
 <a title="" href="../papers/JDBCImplementation.html">JDBC</a>
 </div>
 <div class="menuitem">

Added: incubator/derby/site/trunk/build/site/papers/Intersect-design.html
URL: http://svn.apache.org/viewcvs/incubator/derby/site/trunk/build/site/papers/Intersect-design.html?view=auto&rev=155705
==============================================================================
--- incubator/derby/site/trunk/build/site/papers/Intersect-design.html (added)
+++ incubator/derby/site/trunk/build/site/papers/Intersect-design.html Mon Feb 28 13:56:34 2005
@@ -0,0 +1,203 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
+<HTML>
+<HEAD><TITLE>Intersect & Except Design</TITLE></HEAD>
+<META content="text/html; charset=iso-8859-1" http-equiv=Content-Type>
+<CENTER>
+<H1>Intersect & Except Design</H1>
+Jack Klebanoff<br>
+Feb. 22 2005<br>
+</CENTER>
+<H2>Introduction</H2>
+<p>
+This paper describes the implementation of the INTERSECT and EXCEPT operators. This paper assumes
+basic familiarity with SQL and the language (compiler) portion of Derby.
+<p>
+The INTERSECT and EXCEPT operators operate on
+table expressions producing the intersection and difference, respectively. The syntax is (roughly):
+<p>
+<i>queryExpression</i> INTERSECT [ALL] <i>queryExpression</i><br>
+<i>queryExpression</i> EXCEPT [ALL] <i>queryExpression</i><br>
+<p>
+By default these operators remove duplicates, which can occur if there are duplicates in the
+inputs. If ALL is specified then duplicates are not removed. If t1 has m copies of row R and t2 has
+n copies then t1 INTERSECT ALL t2 returns min(m,n) copies of R, and t1 EXCEPT ALL t2 returns max( 0,
+m-n) copies of R.
+<p>
+The EXCEPT operator has the same precedence as UNION. INTERSECT has higher precedence.
+<p>
+The implementation is spread across several classes, primarily
+<ol>
+<li>SetOpResultSet, which handles <a href="#Execution">execution</a> of the INTERSECT and EXCEPT operators,
+<li>extensions to QueryTreeNode, which handle <a href="#Compilation">binding, optimization, and code generation</a>, and
+<li><a href="#Parser">the parser</a>.
+</ol>
+<H2><a name="Execution">Execution</a></H2>
+The INTERSECT and EXCEPT operations are executed in a similar fashion, by class SetOpResultSet. The two inputs are sorted
+separately. The sort key consists of all the columns. Then SetOpResultSet scans the two inputs
+simultaneously. The INTERSECT operation outputs approximately any row from its left input that is
+also found in its right output. The EXCEPT operation outputs approximately any row from its left
+input that is not found in its right output. Handling of duplicates complicates the picture a
+little, which is the reason for the "approximately" caveat. However the scans proceed in a strictly
+forward direction; there is no need for backtracking.
+<p>
+If the left and right inputs have N and M rows respectively the sorts take time O(N*log(N) +
+M*log(M)). The final scan takes time O(N + M). So the time for the whole operation is O(N*log(N) +
+M*log(M)).
+<H3>Alternative Execution Plans</H3>
+<p>
+Other implementations are possible.
+<ol>
+<li>
+INTERSECT and EXCEPT can be implemeted using hash tables. You can build a hash table on the right
+input. Then, if you somehow know that the rows in the left input are unique, you can scan through
+the rows of the left input and output each one that is found/not found in the hash table. If the size of the
+right input is known at the start then the hash table can be built in time O(M). If the size is not
+known ahead of time then the hash table can be built in time O(M*log(M)). If the hash function is
+good then the final scan step takes time O(N). Keeping track of duplicates slows things down. You
+can keep a hash table of output rows or mark found rows in the right input hash table.
+<p>
+Hash tables were rejected because, when the INTERSECT and EXCEPT operations were implemeted,
+BackingStoreHashtable did not spill to disk. A hash table implementation could exhaust memory.
+<li>
+If the right input is a base table with a unique index then we could forgo sorting the right input
+and use the index to find rows that match the left rows. Unless the left rows are known to be unique
+we must sort them or build a hash table to handle duplicates. Using a hash table to eliminate duplicates the time to perform the
+INTERSECT or EXCEPT would be O(N*log(M) + N'*log(N')), where N' is the number of output rows (N'
+&lt; N). So this is usually faster than the sort merge method, but it cannot always be used.
+</ol>
+<p>
+The current implementation was chosen because it always provides at least decent speed
+and memory utilization, and in many, though certainly not all cases, it is the best implementation.
+<p>
+We could
+have provided several implementations and let the optimizer choose the best, but this does not seem
+worthwhile for operations that are seldom used.
+<H2><a name="Compilation">Binding, Optimization, and Code Generation</a></H2>
+The IntersectOrExceptNode class handles compilation of both INTERSECT and EXCEPT operations, because
+they are implemented similarly. We do very little in the way of optimization because we have only
+implemented one execution plan; the optimizer has nothing to choose from.
+<p>
+The INTERSECT and EXCEPT operators are bound much like the UNION operator. The bind methods are all
+found in super class SetOperatorNode, which is shared with UnionNode.
+<p>
+IntersectOrExceptNode generates OrderByNodes for its inputs at the start of optimization, in the
+preprocess phase. Any column ordering can be used for the sorts, as long as the same one is used for
+both the left and right inputs. IntersectOrExceptNode tries to be a little clever about picking the
+column ordering for the sorts. If the INTERSECT or EXCEPT output must be ordered then
+IntersectOrExceptNode uses the ORDER BY columns as the most significant part of the sort key for its
+inputs. Any columns not in the ORDER BY list are tacked on to the least significant part of
+the sort keys of the inputs. This ensures that the output of the INTERSECT or EXCEPT will be
+properly ordered without an additional sort step.
+<p>
+The architecture of the Derby optimizer makes it difficult to do further optimizations. SelectNode
+processing requires that order by lists be pushed down to them at the start of preprocessing. If an
+input to INTERSECT or EXCEPT is a SELECT (a common case) then IntersectOrExceptNode has to decide
+whether it needs its inputs ordered before it calls the preprocess method of its inputs. That means
+that it must chose its execution plan at the start of the optimization process, not as the result of
+the optimization process.
+<p>
+Code generation is straighforward. It generates code that invokes the ResultSetFactory.getSetOpResultSet method.
+<H2><a name="Parser">Parser</a></H2>
+The parser, in java/engine/org/apache/derby/impl/sql/compile/sqlgrammar.jj, implements the EXCEPT,
+INTERSECT, and UNION operators in the queryExpression method.
+<p>
+The UNION and EXCEPT operators have the same precedence. The INTERSECT operator has higher
+precedence, so
+<BLOCKQUOTE>
+<i>t1</i> EXCEPT <i>t2</i> UNION <i>t3</i>
+</BLOCKQUOTE>
+is equivalent to
+<BLOCKQUOTE>
+(<i>t1</i> EXCEPT <i>t2</i>) UNION <i>t3</i>
+</BLOCKQUOTE>
+and
+<BLOCKQUOTE>
+<i>t1</i> UNION ALL <i>t2</i> UNION <i>t3</i>
+</BLOCKQUOTE>
+is equivalent to
+<BLOCKQUOTE>
+(<i>t1</i> UNION ALL <i>t2</i>) UNION <i>t3</i>
+</BLOCKQUOTE>
+while
+<BLOCKQUOTE>
+<i>t1</i> EXCEPT <i>t2</i> INTERSECT <i>t3</i>
+</BLOCKQUOTE>
+is equivalent to
+<BLOCKQUOTE>
+<i>t1</i> EXCEPT (<i>t2</i> INTERSECT <i>t3</i>)
+</BLOCKQUOTE>
+<p>
+Note that the EXCEPT operator is not associative, nor is UNION ALL
+associative with UNION, so the correct associativity is important, even when the query expression
+uses the same operator.
+<BLOCKQUOTE>
+<i>t1</i> EXCEPT (<i>t2</i> EXCEPT <i>t3</i>)
+</BLOCKQUOTE>
+is <b>not</b> equivalant to
+<BLOCKQUOTE>
+(<i>t1</i> EXCEPT <i>t2</i>) EXCEPT <i>t3</i>
+</BLOCKQUOTE>
+<p>
+This precedence and associativity is implemented in the structure of queryExpression. The higher
+precedence of the INTERSECT operator is implemented by building queryExpression out of
+nonJoinQueryTerm. A nonJoinQueryTerm consists of one or more nonJoinQueryPrimaries connected by
+INTERSECT operators. A queryExpression consists of one or more nonJoinQueryTerms connected by EXCEPT
+or UNION operators.
+<p>
+Our parser is a recursive descent parser. Recursive descent parsers want recursion on the
+right. Therefore they do not handle SQL's operator associativity naturally. The natural grammar for
+queryExpression is<br>
+<pre>
+<i>queryExpression</i> ::=
+  <i>nonJoinQueryTerm</i> |
+  <i>queryExpression</i> <b>UNION</b> [<b>ALL</b>] <i>nonJoinQueryTerm</i> |
+  <i>queryExpression</i> <b>EXCEPT</b> [<b>ALL</b>] <i>nonJoinQueryTerm</i>
+</pre>
+This captures the correct associativity of the UNION and EXCEPT operators, but we cannot use it
+because it has recursion on the left. Therefore our parser uses the following grammar:
+<pre>
+<code>
+ResultSetNode
+queryExpression(ResultSetNode leftSide, int operatorType) throws StandardException :
+{
+    ResultSetNode term;
+}
+{
+    term = nonJoinQueryTerm(leftSide, operatorType) [ term = unionOrExcept(term) ]
+    {
+        return term;
+    }
+}
+ResultSetNode
+unionOrExcept(ResultSetNode term) throws StandardException :
+{
+    ResultSetNode expression;
+    Token tok = null;
+}
+{
+    <UNION> [ tok = <ALL> ]
+        expression = queryExpression(term, (tok != null) ? UNION_ALL_OP : UNION_OP)
+    {
+        return expression;
+    }
+|
+    <EXCEPT> [ tok = <ALL> ]
+        expression = queryExpression(term, (tok != null) ? EXCEPT_ALL_OP : EXCEPT_OP)
+    {
+        return expression;
+    }
+}
+</code>
+</pre>
+We have right recursion, but each queryExpression has to know whether it is a simple expression or
+the right hand side of a UNION or INTERSECT operation. The nonJoinQueryTerm method uses a similar
+trick to implement SQL standard associativity while using right recursion. INTERSECT associativity matters
+when INTERSECT ALL is mixed with plain INTERSECT.
+<p>
+The creation of union, intersect, or except
+query tree nodes is done in the nonJoinQueryTerm method.
+
+<p>
+[<a href="papers/index.html">Back to Derby Papers</a>]
+</p>
+</html>

Propchange: incubator/derby/site/trunk/build/site/papers/Intersect-design.html
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: incubator/derby/site/trunk/build/site/papers/JDBCImplementation.html
URL: http://svn.apache.org/viewcvs/incubator/derby/site/trunk/build/site/papers/JDBCImplementation.html?view=diff&r1=155704&r2=155705
==============================================================================
--- incubator/derby/site/trunk/build/site/papers/JDBCImplementation.html (original)
+++ incubator/derby/site/trunk/build/site/papers/JDBCImplementation.html Mon Feb 28 13:56:34 2005
@@ -128,6 +128,9 @@
 <div class="menuitem">
 <a title="" href="../papers/derby_htw.html">How Things Work</a>
 </div>
+<div class="menuitem">
+<a title="" href="../papers/Intersect-design.html">Intersect &amp; Except</a>
+</div>
 <div class="menupage">
 <div class="menupagetitle">JDBC</div>
 </div>

Modified: incubator/derby/site/trunk/build/site/papers/MiscPresentations.html
URL: http://svn.apache.org/viewcvs/incubator/derby/site/trunk/build/site/papers/MiscPresentations.html?view=diff&r1=155704&r2=155705
==============================================================================
--- incubator/derby/site/trunk/build/site/papers/MiscPresentations.html (original)
+++ incubator/derby/site/trunk/build/site/papers/MiscPresentations.html Mon Feb 28 13:56:34 2005
@@ -129,6 +129,9 @@
 <a title="" href="../papers/derby_htw.html">How Things Work</a>
 </div>
 <div class="menuitem">
+<a title="" href="../papers/Intersect-design.html">Intersect &amp; Except</a>
+</div>
+<div class="menuitem">
 <a title="" href="../papers/JDBCImplementation.html">JDBC</a>
 </div>
 <div class="menuitem">

Modified: incubator/derby/site/trunk/build/site/papers/derby_arch.html
URL: http://svn.apache.org/viewcvs/incubator/derby/site/trunk/build/site/papers/derby_arch.html?view=diff&r1=155704&r2=155705
==============================================================================
--- incubator/derby/site/trunk/build/site/papers/derby_arch.html (original)
+++ incubator/derby/site/trunk/build/site/papers/derby_arch.html Mon Feb 28 13:56:34 2005
@@ -129,6 +129,9 @@
 <a title="" href="../papers/derby_htw.html">How Things Work</a>
 </div>
 <div class="menuitem">
+<a title="" href="../papers/Intersect-design.html">Intersect &amp; Except</a>
+</div>
+<div class="menuitem">
 <a title="" href="../papers/JDBCImplementation.html">JDBC</a>
 </div>
 <div class="menuitem">

Modified: incubator/derby/site/trunk/build/site/papers/derby_htw.html
URL: http://svn.apache.org/viewcvs/incubator/derby/site/trunk/build/site/papers/derby_htw.html?view=diff&r1=155704&r2=155705
==============================================================================
--- incubator/derby/site/trunk/build/site/papers/derby_htw.html (original)
+++ incubator/derby/site/trunk/build/site/papers/derby_htw.html Mon Feb 28 13:56:34 2005
@@ -129,6 +129,9 @@
 <div class="menupagetitle">How Things Work</div>
 </div>
 <div class="menuitem">
+<a title="" href="../papers/Intersect-design.html">Intersect &amp; Except</a>
+</div>
+<div class="menuitem">
 <a title="" href="../papers/JDBCImplementation.html">JDBC</a>
 </div>
 <div class="menuitem">

Modified: incubator/derby/site/trunk/build/site/papers/derby_web.html
URL: http://svn.apache.org/viewcvs/incubator/derby/site/trunk/build/site/papers/derby_web.html?view=diff&r1=155704&r2=155705
==============================================================================
--- incubator/derby/site/trunk/build/site/papers/derby_web.html (original)
+++ incubator/derby/site/trunk/build/site/papers/derby_web.html Mon Feb 28 13:56:34 2005
@@ -129,6 +129,9 @@
 <a title="" href="../papers/derby_htw.html">How Things Work</a>
 </div>
 <div class="menuitem">
+<a title="" href="../papers/Intersect-design.html">Intersect &amp; Except</a>
+</div>
+<div class="menuitem">
 <a title="" href="../papers/JDBCImplementation.html">JDBC</a>
 </div>
 <div class="menuitem">

Modified: incubator/derby/site/trunk/build/site/papers/fortune_tut.html
URL: http://svn.apache.org/viewcvs/incubator/derby/site/trunk/build/site/papers/fortune_tut.html?view=diff&r1=155704&r2=155705
==============================================================================
--- incubator/derby/site/trunk/build/site/papers/fortune_tut.html (original)
+++ incubator/derby/site/trunk/build/site/papers/fortune_tut.html Mon Feb 28 13:56:34 2005
@@ -129,6 +129,9 @@
 <a title="" href="../papers/derby_htw.html">How Things Work</a>
 </div>
 <div class="menuitem">
+<a title="" href="../papers/Intersect-design.html">Intersect &amp; Except</a>
+</div>
+<div class="menuitem">
 <a title="" href="../papers/JDBCImplementation.html">JDBC</a>
 </div>
 <div class="menuitem">

Modified: incubator/derby/site/trunk/build/site/papers/index.html
URL: http://svn.apache.org/viewcvs/incubator/derby/site/trunk/build/site/papers/index.html?view=diff&r1=155704&r2=155705
==============================================================================
--- incubator/derby/site/trunk/build/site/papers/index.html (original)
+++ incubator/derby/site/trunk/build/site/papers/index.html Mon Feb 28 13:56:34 2005
@@ -129,6 +129,9 @@
 <a title="" href="../papers/derby_htw.html">How Things Work</a>
 </div>
 <div class="menuitem">
+<a title="" href="../papers/Intersect-design.html">Intersect &amp; Except</a>
+</div>
+<div class="menuitem">
 <a title="" href="../papers/JDBCImplementation.html">JDBC</a>
 </div>
 <div class="menuitem">
@@ -215,6 +218,10 @@
 <li>
 <a href="derby_htw.html">How Things Work</a> 
 </li> 
+        
+<li>Intersect &amp; Except: <a href="Intersect-design.html">Intersect
+          &amp; Except Design</a>
+</li>
 	
 <li>JDBC: <a href="JDBCImplementation.html">Derby JDBC Implementation
 		Notes</a>
@@ -230,7 +237,7 @@
 </div>
 
 
-<a name="N10040"></a><a name="Presentations"></a>
+<a name="N10046"></a><a name="Presentations"></a>
 <h2 class="boxed">Presentations</h2>
 <div class="section">
 <ul>
@@ -289,7 +296,7 @@
 </div>
 
 
-<a name="N100A6"></a><a name="How+To%27s"></a>
+<a name="N100AC"></a><a name="How+To%27s"></a>
 <h2 class="boxed">How To's</h2>
 <div class="section">
 <p>
@@ -301,7 +308,7 @@
 </div>
 
 
-<a name="N100B4"></a><a name="How+to+Contribute+Papers"></a>
+<a name="N100BA"></a><a name="How+to+Contribute+Papers"></a>
 <h2 class="boxed">How to Contribute Papers</h2>
 <div class="section">
 <p>
@@ -374,7 +381,7 @@
 
 
 <p>
-<em>Last Updated: February 11, 2005</em>
+<em>Last Updated: February 28, 2005</em>
 </p>
 
 </div>

Modified: incubator/derby/site/trunk/build/site/papers/logformats.html
URL: http://svn.apache.org/viewcvs/incubator/derby/site/trunk/build/site/papers/logformats.html?view=diff&r1=155704&r2=155705
==============================================================================
--- incubator/derby/site/trunk/build/site/papers/logformats.html (original)
+++ incubator/derby/site/trunk/build/site/papers/logformats.html Mon Feb 28 13:56:34 2005
@@ -129,6 +129,9 @@
 <a title="" href="../papers/derby_htw.html">How Things Work</a>
 </div>
 <div class="menuitem">
+<a title="" href="../papers/Intersect-design.html">Intersect &amp; Except</a>
+</div>
+<div class="menuitem">
 <a title="" href="../papers/JDBCImplementation.html">JDBC</a>
 </div>
 <div class="menupage">

Modified: incubator/derby/site/trunk/build/site/papers/pageformats.html
URL: http://svn.apache.org/viewcvs/incubator/derby/site/trunk/build/site/papers/pageformats.html?view=diff&r1=155704&r2=155705
==============================================================================
--- incubator/derby/site/trunk/build/site/papers/pageformats.html (original)
+++ incubator/derby/site/trunk/build/site/papers/pageformats.html Mon Feb 28 13:56:34 2005
@@ -129,6 +129,9 @@
 <a title="" href="../papers/derby_htw.html">How Things Work</a>
 </div>
 <div class="menuitem">
+<a title="" href="../papers/Intersect-design.html">Intersect &amp; Except</a>
+</div>
+<div class="menuitem">
 <a title="" href="../papers/JDBCImplementation.html">JDBC</a>
 </div>
 <div class="menuitem">

Modified: incubator/derby/site/trunk/build/site/papers/recovery.html
URL: http://svn.apache.org/viewcvs/incubator/derby/site/trunk/build/site/papers/recovery.html?view=diff&r1=155704&r2=155705
==============================================================================
--- incubator/derby/site/trunk/build/site/papers/recovery.html (original)
+++ incubator/derby/site/trunk/build/site/papers/recovery.html Mon Feb 28 13:56:34 2005
@@ -129,6 +129,9 @@
 <a title="" href="../papers/derby_htw.html">How Things Work</a>
 </div>
 <div class="menuitem">
+<a title="" href="../papers/Intersect-design.html">Intersect &amp; Except</a>
+</div>
+<div class="menuitem">
 <a title="" href="../papers/JDBCImplementation.html">JDBC</a>
 </div>
 <div class="menuitem">

Modified: incubator/derby/site/trunk/build/site/releases/release-10.0.2.1.html
URL: http://svn.apache.org/viewcvs/incubator/derby/site/trunk/build/site/releases/release-10.0.2.1.html?view=diff&r1=155704&r2=155705
==============================================================================
--- incubator/derby/site/trunk/build/site/releases/release-10.0.2.1.html (original)
+++ incubator/derby/site/trunk/build/site/releases/release-10.0.2.1.html Mon Feb 28 13:56:34 2005
@@ -165,6 +165,9 @@
 <a title="" href="../papers/derby_htw.html">How Things Work</a>
 </div>
 <div class="menuitem">
+<a title="" href="../papers/Intersect-design.html">Intersect &amp; Except</a>
+</div>
+<div class="menuitem">
 <a title="" href="../papers/JDBCImplementation.html">JDBC</a>
 </div>
 <div class="menuitem">

Added: incubator/derby/site/trunk/src/documentation/content/papers/Intersect-design.html
URL: http://svn.apache.org/viewcvs/incubator/derby/site/trunk/src/documentation/content/papers/Intersect-design.html?view=auto&rev=155705
==============================================================================
--- incubator/derby/site/trunk/src/documentation/content/papers/Intersect-design.html (added)
+++ incubator/derby/site/trunk/src/documentation/content/papers/Intersect-design.html Mon Feb 28 13:56:34 2005
@@ -0,0 +1,203 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
+<HTML>
+<HEAD><TITLE>Intersect & Except Design</TITLE></HEAD>
+<META content="text/html; charset=iso-8859-1" http-equiv=Content-Type>
+<CENTER>
+<H1>Intersect & Except Design</H1>
+Jack Klebanoff<br>
+Feb. 22 2005<br>
+</CENTER>
+<H2>Introduction</H2>
+<p>
+This paper describes the implementation of the INTERSECT and EXCEPT operators. This paper assumes
+basic familiarity with SQL and the language (compiler) portion of Derby.
+<p>
+The INTERSECT and EXCEPT operators operate on
+table expressions producing the intersection and difference, respectively. The syntax is (roughly):
+<p>
+<i>queryExpression</i> INTERSECT [ALL] <i>queryExpression</i><br>
+<i>queryExpression</i> EXCEPT [ALL] <i>queryExpression</i><br>
+<p>
+By default these operators remove duplicates, which can occur if there are duplicates in the
+inputs. If ALL is specified then duplicates are not removed. If t1 has m copies of row R and t2 has
+n copies then t1 INTERSECT ALL t2 returns min(m,n) copies of R, and t1 EXCEPT ALL t2 returns max( 0,
+m-n) copies of R.
+<p>
+The EXCEPT operator has the same precedence as UNION. INTERSECT has higher precedence.
+<p>
+The implementation is spread across several classes, primarily
+<ol>
+<li>SetOpResultSet, which handles <a href="#Execution">execution</a> of the INTERSECT and EXCEPT operators,
+<li>extensions to QueryTreeNode, which handle <a href="#Compilation">binding, optimization, and code generation</a>, and
+<li><a href="#Parser">the parser</a>.
+</ol>
+<H2><a name="Execution">Execution</a></H2>
+The INTERSECT and EXCEPT operations are executed in a similar fashion, by class SetOpResultSet. The two inputs are sorted
+separately. The sort key consists of all the columns. Then SetOpResultSet scans the two inputs
+simultaneously. The INTERSECT operation outputs approximately any row from its left input that is
+also found in its right output. The EXCEPT operation outputs approximately any row from its left
+input that is not found in its right output. Handling of duplicates complicates the picture a
+little, which is the reason for the "approximately" caveat. However the scans proceed in a strictly
+forward direction; there is no need for backtracking.
+<p>
+If the left and right inputs have N and M rows respectively the sorts take time O(N*log(N) +
+M*log(M)). The final scan takes time O(N + M). So the time for the whole operation is O(N*log(N) +
+M*log(M)).
+<H3>Alternative Execution Plans</H3>
+<p>
+Other implementations are possible.
+<ol>
+<li>
+INTERSECT and EXCEPT can be implemeted using hash tables. You can build a hash table on the right
+input. Then, if you somehow know that the rows in the left input are unique, you can scan through
+the rows of the left input and output each one that is found/not found in the hash table. If the size of the
+right input is known at the start then the hash table can be built in time O(M). If the size is not
+known ahead of time then the hash table can be built in time O(M*log(M)). If the hash function is
+good then the final scan step takes time O(N). Keeping track of duplicates slows things down. You
+can keep a hash table of output rows or mark found rows in the right input hash table.
+<p>
+Hash tables were rejected because, when the INTERSECT and EXCEPT operations were implemeted,
+BackingStoreHashtable did not spill to disk. A hash table implementation could exhaust memory.
+<li>
+If the right input is a base table with a unique index then we could forgo sorting the right input
+and use the index to find rows that match the left rows. Unless the left rows are known to be unique
+we must sort them or build a hash table to handle duplicates. Using a hash table to eliminate duplicates the time to perform the
+INTERSECT or EXCEPT would be O(N*log(M) + N'*log(N')), where N' is the number of output rows (N'
+&lt; N). So this is usually faster than the sort merge method, but it cannot always be used.
+</ol>
+<p>
+The current implementation was chosen because it always provides at least decent speed
+and memory utilization, and in many, though certainly not all cases, it is the best implementation.
+<p>
+We could
+have provided several implementations and let the optimizer choose the best, but this does not seem
+worthwhile for operations that are seldom used.
+<H2><a name="Compilation">Binding, Optimization, and Code Generation</a></H2>
+The IntersectOrExceptNode class handles compilation of both INTERSECT and EXCEPT operations, because
+they are implemented similarly. We do very little in the way of optimization because we have only
+implemented one execution plan; the optimizer has nothing to choose from.
+<p>
+The INTERSECT and EXCEPT operators are bound much like the UNION operator. The bind methods are all
+found in super class SetOperatorNode, which is shared with UnionNode.
+<p>
+IntersectOrExceptNode generates OrderByNodes for its inputs at the start of optimization, in the
+preprocess phase. Any column ordering can be used for the sorts, as long as the same one is used for
+both the left and right inputs. IntersectOrExceptNode tries to be a little clever about picking the
+column ordering for the sorts. If the INTERSECT or EXCEPT output must be ordered then
+IntersectOrExceptNode uses the ORDER BY columns as the most significant part of the sort key for its
+inputs. Any columns not in the ORDER BY list are tacked on to the least significant part of
+the sort keys of the inputs. This ensures that the output of the INTERSECT or EXCEPT will be
+properly ordered without an additional sort step.
+<p>
+The architecture of the Derby optimizer makes it difficult to do further optimizations. SelectNode
+processing requires that order by lists be pushed down to them at the start of preprocessing. If an
+input to INTERSECT or EXCEPT is a SELECT (a common case) then IntersectOrExceptNode has to decide
+whether it needs its inputs ordered before it calls the preprocess method of its inputs. That means
+that it must chose its execution plan at the start of the optimization process, not as the result of
+the optimization process.
+<p>
+Code generation is straighforward. It generates code that invokes the ResultSetFactory.getSetOpResultSet method.
+<H2><a name="Parser">Parser</a></H2>
+The parser, in java/engine/org/apache/derby/impl/sql/compile/sqlgrammar.jj, implements the EXCEPT,
+INTERSECT, and UNION operators in the queryExpression method.
+<p>
+The UNION and EXCEPT operators have the same precedence. The INTERSECT operator has higher
+precedence, so
+<BLOCKQUOTE>
+<i>t1</i> EXCEPT <i>t2</i> UNION <i>t3</i>
+</BLOCKQUOTE>
+is equivalent to
+<BLOCKQUOTE>
+(<i>t1</i> EXCEPT <i>t2</i>) UNION <i>t3</i>
+</BLOCKQUOTE>
+and
+<BLOCKQUOTE>
+<i>t1</i> UNION ALL <i>t2</i> UNION <i>t3</i>
+</BLOCKQUOTE>
+is equivalent to
+<BLOCKQUOTE>
+(<i>t1</i> UNION ALL <i>t2</i>) UNION <i>t3</i>
+</BLOCKQUOTE>
+while
+<BLOCKQUOTE>
+<i>t1</i> EXCEPT <i>t2</i> INTERSECT <i>t3</i>
+</BLOCKQUOTE>
+is equivalent to
+<BLOCKQUOTE>
+<i>t1</i> EXCEPT (<i>t2</i> INTERSECT <i>t3</i>)
+</BLOCKQUOTE>
+<p>
+Note that the EXCEPT operator is not associative, nor is UNION ALL
+associative with UNION, so the correct associativity is important, even when the query expression
+uses the same operator.
+<BLOCKQUOTE>
+<i>t1</i> EXCEPT (<i>t2</i> EXCEPT <i>t3</i>)
+</BLOCKQUOTE>
+is <b>not</b> equivalant to
+<BLOCKQUOTE>
+(<i>t1</i> EXCEPT <i>t2</i>) EXCEPT <i>t3</i>
+</BLOCKQUOTE>
+<p>
+This precedence and associativity is implemented in the structure of queryExpression. The higher
+precedence of the INTERSECT operator is implemented by building queryExpression out of
+nonJoinQueryTerm. A nonJoinQueryTerm consists of one or more nonJoinQueryPrimaries connected by
+INTERSECT operators. A queryExpression consists of one or more nonJoinQueryTerms connected by EXCEPT
+or UNION operators.
+<p>
+Our parser is a recursive descent parser. Recursive descent parsers want recursion on the
+right. Therefore they do not handle SQL's operator associativity naturally. The natural grammar for
+queryExpression is<br>
+<pre>
+<i>queryExpression</i> ::=
+  <i>nonJoinQueryTerm</i> |
+  <i>queryExpression</i> <b>UNION</b> [<b>ALL</b>] <i>nonJoinQueryTerm</i> |
+  <i>queryExpression</i> <b>EXCEPT</b> [<b>ALL</b>] <i>nonJoinQueryTerm</i>
+</pre>
+This captures the correct associativity of the UNION and EXCEPT operators, but we cannot use it
+because it has recursion on the left. Therefore our parser uses the following grammar:
+<pre>
+<code>
+ResultSetNode
+queryExpression(ResultSetNode leftSide, int operatorType) throws StandardException :
+{
+    ResultSetNode term;
+}
+{
+    term = nonJoinQueryTerm(leftSide, operatorType) [ term = unionOrExcept(term) ]
+    {
+        return term;
+    }
+}
+ResultSetNode
+unionOrExcept(ResultSetNode term) throws StandardException :
+{
+    ResultSetNode expression;
+    Token tok = null;
+}
+{
+    <UNION> [ tok = <ALL> ]
+        expression = queryExpression(term, (tok != null) ? UNION_ALL_OP : UNION_OP)
+    {
+        return expression;
+    }
+|
+    <EXCEPT> [ tok = <ALL> ]
+        expression = queryExpression(term, (tok != null) ? EXCEPT_ALL_OP : EXCEPT_OP)
+    {
+        return expression;
+    }
+}
+</code>
+</pre>
+We have right recursion, but each queryExpression has to know whether it is a simple expression or
+the right hand side of a UNION or INTERSECT operation. The nonJoinQueryTerm method uses a similar
+trick to implement SQL standard associativity while using right recursion. INTERSECT associativity matters
+when INTERSECT ALL is mixed with plain INTERSECT.
+<p>
+The creation of union, intersect, or except
+query tree nodes is done in the nonJoinQueryTerm method.
+
+<p>
+[<a href="papers/index.html">Back to Derby Papers</a>]
+</p>
+</html>

Propchange: incubator/derby/site/trunk/src/documentation/content/papers/Intersect-design.html
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: incubator/derby/site/trunk/src/documentation/content/xdocs/papers/index.xml
URL: http://svn.apache.org/viewcvs/incubator/derby/site/trunk/src/documentation/content/xdocs/papers/index.xml?view=diff&r1=155704&r2=155705
==============================================================================
--- incubator/derby/site/trunk/src/documentation/content/xdocs/papers/index.xml (original)
+++ incubator/derby/site/trunk/src/documentation/content/xdocs/papers/index.xml Mon Feb 28 13:56:34 2005
@@ -20,6 +20,8 @@
 	 Overview</a></li>
         <li>Disk Page Format:<a href="pageformats.html">Derby On Disk Page Format</a></li>
         <li><a href="derby_htw.html">How Things Work</a> </li> 
+        <li>Intersect &amp; Except: <a href="Intersect-design.html">Intersect
+          &amp; Except Design</a></li>
 	<li>JDBC: <a href="JDBCImplementation.html">Derby JDBC Implementation
 		Notes</a></li>
 	<li>Log Format: <a href="logformats.html">Derby Write Ahead Log Format</a></li>
@@ -151,6 +153,6 @@
 
 </section>
 
-<p><em>Last Updated: February 11, 2005</em></p>
+<p><em>Last Updated: February 28, 2005</em></p>
 </body>
 </document>

Modified: incubator/derby/site/trunk/src/documentation/content/xdocs/site.xml
URL: http://svn.apache.org/viewcvs/incubator/derby/site/trunk/src/documentation/content/xdocs/site.xml?view=diff&r1=155704&r2=155705
==============================================================================
--- incubator/derby/site/trunk/src/documentation/content/xdocs/site.xml (original)
+++ incubator/derby/site/trunk/src/documentation/content/xdocs/site.xml Mon Feb 28 13:56:34 2005
@@ -24,12 +24,13 @@
   </papers>
 
   <engine label="Derby Engine" href="papers/" tab="papers">
-      <index   label="Architecture"            href="derby_arch.html"/>
-      <pformat label="Disk Page Format"        href="pageformats.html"/>
-      <misc    label="How Things Work"         href="derby_htw.html"/>
-      <jdbc    label="JDBC"                    href="JDBCImplementation.html"/>
-      <log     label="Log Format"              href="logformats.html"/>
-      <recover label="Logging &amp; Recovery"  href="recovery.html"/>
+      <index     label="Architecture"           href="derby_arch.html"/>
+      <pformat   label="Disk Page Format"       href="pageformats.html"/>
+      <misc      label="How Things Work"        href="derby_htw.html"/>
+      <intersect label="Intersect &amp; Except" href="Intersect-design.html"/>
+      <jdbc      label="JDBC"                   href="JDBCImplementation.html"/>
+      <log       label="Log Format"             href="logformats.html"/>
+      <recover   label="Logging &amp; Recovery" href="recovery.html"/>
   </engine>
 
   <present label="Presentations" href="papers/" tab="papers">