You are viewing a plain text version of this content. The canonical link for it is here.
Posted to derby-commits@db.apache.org by bp...@apache.org on 2008/10/26 16:05:13 UTC

svn commit: r708002 - in /db/derby/code/trunk/java: engine/org/apache/derby/impl/sql/compile/GroupByNode.java testing/org/apache/derbyTesting/functionTests/tests/lang/GroupByTest.java testing/org/apache/derbyTesting/junit/RuntimeStatisticsParser.java

Author: bpendleton
Date: Sun Oct 26 08:05:13 2008
New Revision: 708002

URL: http://svn.apache.org/viewvc?rev=708002&view=rev
Log:
DERBY-3904: NPE on left join with aggregate

The issue involves a very special optimization that is performed
for MIN and MAX queries in which we may be able to use an index
to go directly to the lowest/highest value of the desired column.
For example, in the query

  SELECT MAX(d1) FROM t1

if there is an index on d1, we can use that index to retrieve
the max value very rapidly. In order to incorporate this
optimization, the following conditions must be met:

 - No group by
 - One of:
   - min/max(ColumnReference) is only aggregate && source is
     ordered on the ColumnReference
   - min/max(ConstantNode)
The optimization of the other way around (min with desc index or
max with asc index) has the same restrictions with the additional
temporary restriction of no qualifications at all (because
we don't have true backward scans). 

The source of the data must also be "simple" (not a result of a join),
and the NullPointerException occurred during the code that tried
to establish the above conditions because it wasn't thoroughly
enough excluding the join case. In the query:

  SELECT MAX( T1.D1 ) AS D FROM T1 LEFT JOIN T2 ON T1.D1 = T2.D2
  WHERE T2.D2 IS NULL

the code in GroupByNode.considerPostOptimizeOptimizations was
trying to traverse the AccessPathImpl to find the index scan
information, but for this LEFT JOIN case there is an AccessPathImpl
but no index scan information, because there is a join being
performed, not an index scan.

The solution is to examine the AccessPathImpl more carefully,
and only search the index scan information if an index scan
is actually present.

Also added a few tests, including an enhancement to the test
library's RuntimeStatisticsParser so that it can determine
if a Last Key Index Scan is being performed by the query.


Modified:
    db/derby/code/trunk/java/engine/org/apache/derby/impl/sql/compile/GroupByNode.java
    db/derby/code/trunk/java/testing/org/apache/derbyTesting/functionTests/tests/lang/GroupByTest.java
    db/derby/code/trunk/java/testing/org/apache/derbyTesting/junit/RuntimeStatisticsParser.java

Modified: db/derby/code/trunk/java/engine/org/apache/derby/impl/sql/compile/GroupByNode.java
URL: http://svn.apache.org/viewvc/db/derby/code/trunk/java/engine/org/apache/derby/impl/sql/compile/GroupByNode.java?rev=708002&r1=708001&r2=708002&view=diff
==============================================================================
--- db/derby/code/trunk/java/engine/org/apache/derby/impl/sql/compile/GroupByNode.java (original)
+++ db/derby/code/trunk/java/engine/org/apache/derby/impl/sql/compile/GroupByNode.java Sun Oct 26 08:05:13 2008
@@ -1186,10 +1186,13 @@
 							int colNum = crs[0].getColumnNumber();
 							
 							/* Check if we have an access path, this will be
-							 * null in a join case (See Beetle 4423)
+							 * null in a join case (See Beetle 4423,DERBY-3904)
 							 */
 							AccessPath accessPath= getTrulyTheBestAccessPath();
-							if (accessPath == null)
+							if (accessPath == null ||
+								accessPath.getConglomerateDescriptor()==null||
+								accessPath.getConglomerateDescriptor().
+								           getIndexDescriptor() == null)
 								return;
 							IndexDescriptor id = accessPath.
 												getConglomerateDescriptor().

Modified: db/derby/code/trunk/java/testing/org/apache/derbyTesting/functionTests/tests/lang/GroupByTest.java
URL: http://svn.apache.org/viewvc/db/derby/code/trunk/java/testing/org/apache/derbyTesting/functionTests/tests/lang/GroupByTest.java?rev=708002&r1=708001&r2=708002&view=diff
==============================================================================
--- db/derby/code/trunk/java/testing/org/apache/derbyTesting/functionTests/tests/lang/GroupByTest.java (original)
+++ db/derby/code/trunk/java/testing/org/apache/derbyTesting/functionTests/tests/lang/GroupByTest.java Sun Oct 26 08:05:13 2008
@@ -41,6 +41,8 @@
 import org.apache.derbyTesting.junit.BaseJDBCTestCase;
 import org.apache.derbyTesting.junit.CleanDatabaseTestSetup;
 import org.apache.derbyTesting.junit.JDBC;
+import org.apache.derbyTesting.junit.RuntimeStatisticsParser;
+import org.apache.derbyTesting.junit.SQLUtilities;
 
 /**
  * Many of these test cases were converted from the old groupBy.sql
@@ -106,6 +108,16 @@
 
         st.execute("create table d3219 (a varchar(10), b varchar(1000))");
 
+		st.execute("CREATE TABLE d3904_T1( " +
+				"D1 DATE NOT NULL PRIMARY KEY, N1 VARCHAR( 10 ))");
+		st.execute("CREATE TABLE d3904_T2( " +
+				"D2 DATE NOT NULL PRIMARY KEY, N2 VARCHAR( 10 ))");
+		st.execute("INSERT INTO d3904_T1 VALUES "+
+				"( DATE( '2008-10-01' ), 'something' ), "+
+				"( DATE( '2008-10-02' ), 'something' )" );
+		st.execute("INSERT INTO d3904_T2 VALUES" +
+				"( DATE( '2008-10-01' ), 'something' )" ); 
+
         st.executeUpdate("create table d2457_o (name varchar(20), ord int)");
         st.executeUpdate("create table d2457_a (ord int, amount int)");
         st.executeUpdate("insert into d2457_o values ('John', 1)," +
@@ -2118,5 +2130,53 @@
         }
         return rows;
     }
+
+    /**
+      * DERBY-3904: Min/Max optimization needs to be aware of joins.
+      */
+    public void testDerby3904MinMaxOptimization() throws SQLException
+    {
+        Statement s = createStatement();
+
+        JDBC.assertFullResultSet(
+                s.executeQuery("SELECT d3904_T1.D1 " +
+					"FROM d3904_T1 LEFT JOIN d3904_T2 " +
+				    "ON d3904_T1.D1 = d3904_T2.D2 " +
+					"WHERE d3904_T2.D2 IS NULL"), 
+            new String[][] {  {"2008-10-02"} } );
+        JDBC.assertFullResultSet(
+                s.executeQuery("SELECT MAX( d3904_T1.D1 ) as D " +
+					"FROM d3904_T1 WHERE d3904_T1.D1 NOT IN " +
+					"( SELECT d3904_T2.D2 FROM d3904_T2 )"), 
+            new String[][] {  {"2008-10-02"} } );
+		//
+		// In DERBY-3904, this next query fails with a null pointer
+		// exception because GroupByNode doesn't realize that there
+		// is a join involved here
+		//
+        JDBC.assertFullResultSet(
+                s.executeQuery("SELECT MAX( d3904_T1.D1 ) AS D " +
+					"FROM d3904_T1 LEFT JOIN d3904_T2 " +
+					"ON d3904_T1.D1 = d3904_T2.D2 " +
+					"WHERE d3904_T2.D2 IS NULL"),
+            new String[][] {  {"2008-10-02"} } );
+
+		// Verify that the min/max optimization still works for the
+		// simple query SELECT MAX(D1) FROM T1:
+		s.execute("call SYSCS_UTIL.SYSCS_SET_RUNTIMESTATISTICS(1)");
+        JDBC.assertFullResultSet(
+                s.executeQuery("SELECT MAX(D1) FROM D3904_T1"),
+            new String[][] {  {"2008-10-02"} } );
+		RuntimeStatisticsParser rtsp =
+			SQLUtilities.getRuntimeStatisticsParser(s);
+		assertTrue(rtsp.usedLastKeyIndexScan());
+		assertFalse(rtsp.usedIndexRowToBaseRow());
+
+		// A form of the Beetle 4423 query:
+        JDBC.assertFullResultSet(
+                s.executeQuery("SELECT MAX(D1) " +
+					"FROM d3904_T1, D3904_T2 WHERE d3904_T1.D1='2008-10-02'"),
+            new String[][] {  {"2008-10-02"} } );
+	}
 }
 

Modified: db/derby/code/trunk/java/testing/org/apache/derbyTesting/junit/RuntimeStatisticsParser.java
URL: http://svn.apache.org/viewvc/db/derby/code/trunk/java/testing/org/apache/derbyTesting/junit/RuntimeStatisticsParser.java?rev=708002&r1=708001&r2=708002&view=diff
==============================================================================
--- db/derby/code/trunk/java/testing/org/apache/derbyTesting/junit/RuntimeStatisticsParser.java (original)
+++ db/derby/code/trunk/java/testing/org/apache/derbyTesting/junit/RuntimeStatisticsParser.java Sun Oct 26 08:05:13 2008
@@ -31,6 +31,7 @@
     private boolean tableScan = false;
     private final boolean indexScan;
     private final boolean indexRowToBaseRow;
+	private final boolean lastKeyIndexScan;
     private String statistics = "";
     private boolean scrollInsensitive = false;
     private final HashSet qualifiers;
@@ -65,6 +66,7 @@
         indexScan = (rts.indexOf("Index Scan ResultSet") >= 0);
         indexRowToBaseRow =
             (rts.indexOf("Index Row to Base Row ResultSet") >= 0);
+        lastKeyIndexScan = (rts.indexOf("Last Key Index Scan ResultSet") >= 0);
         
         if (rts.indexOf("Eliminate duplicates = true") > 0) {
         	eliminatedDuplicates = true;
@@ -203,6 +205,15 @@
     }
 
     /**
+     * Return whether or not a last key index scan result set was used
+	 * in the query. A last key index scan is a special optimization for
+	 * MIN and MAX queries against an indexed column (SELECT MAX(ID) FROM T).
+     */
+    public boolean usedLastKeyIndexScan() {
+        return lastKeyIndexScan;
+    }
+
+    /**
      * Return whether or not an index row to base row result set was used in
      * the query.
      */