You are viewing a plain text version of this content. The canonical link for it is here.
Posted to derby-commits@db.apache.org by bp...@apache.org on 2012/02/22 07:05:56 UTC

svn commit: r1292134 - in /db/derby/code/trunk/java: engine/org/apache/derby/impl/sql/execute/GroupedAggregateResultSet.java testing/org/apache/derbyTesting/functionTests/tests/lang/GroupByTest.java

Author: bpendleton
Date: Wed Feb 22 06:05:55 2012
New Revision: 1292134

URL: http://svn.apache.org/viewvc?rev=1292134&view=rev
Log:
DERBY-5584: distinct grouped aggregates can return wrong results

This change addresses a problem that can arise when a GroupedAggregateResultSet
that contains a distinct aggregate is processed multiple times in the
same query execution.

The problem involves this section of GroupedAggregateResultSet:

                        /*
                        ** If there was a distinct aggregate, then that column
                        ** was automatically included as the last column in
                        ** the sort ordering. But we don't want it to be part
                        ** of the ordering anymore, because we aren't grouping
                        ** by that column, we just sorted it so that distinct
                        ** aggregation would see the values in order.
                        */

The solution that was implemented in GroupedAggregateResultSet prior to this
change was assuming that the result set was only opened and read once; it
physically removed the last column from the ordering columns as a side effect
of processing the result set.

However, during a query plan such as this cartesian product, the GROUP BY
subquery is created, then opened/read/closed, opened/read/closed, etc.,
once per row of the other side of the cartesian product.

In that case, we can't physically remove the last column each time, because
then the second and subsequent times that we read the result set, we are
sorting on the wrong columns and we produce the wrong results.

The solution is to have a better way of handling that extra invisible column,
so that we can consider it sometimes, and ignore it other times,
without doing something as destructive as physically removing it from the
ordering array, which is what we do now. 

Note that Derby has had a limitation that there be at most one DISTINCT
aggregate in a query for a long time, probably ever since it was written.
See, for example, this link from the 10.2 docs:
http://db.apache.org/derby/docs/10.2/ref/rrefsqlj32693.html

        Only one DISTINCT aggregate expression per SelectExpression is allowed.
        For example, the following query is not valid:

            SELECT AVG (DISTINCT flying_time), SUM (DISTINCT miles) FROM Flights

The GroupedAggregateResultSet implementation is aware of this limit, and knows
that there is at most one distinct aggregate in the result set. This change
neither increases that dependency nor lessens it; I'm just noting it here
for the record.



Modified:
    db/derby/code/trunk/java/engine/org/apache/derby/impl/sql/execute/GroupedAggregateResultSet.java
    db/derby/code/trunk/java/testing/org/apache/derbyTesting/functionTests/tests/lang/GroupByTest.java

Modified: db/derby/code/trunk/java/engine/org/apache/derby/impl/sql/execute/GroupedAggregateResultSet.java
URL: http://svn.apache.org/viewvc/db/derby/code/trunk/java/engine/org/apache/derby/impl/sql/execute/GroupedAggregateResultSet.java?rev=1292134&r1=1292133&r2=1292134&view=diff
==============================================================================
--- db/derby/code/trunk/java/engine/org/apache/derby/impl/sql/execute/GroupedAggregateResultSet.java (original)
+++ db/derby/code/trunk/java/engine/org/apache/derby/impl/sql/execute/GroupedAggregateResultSet.java Wed Feb 22 06:05:55 2012
@@ -88,6 +88,7 @@ class GroupedAggregateResultSet extends 
 	private ExecIndexRow sortTemplateRow;
 	public	boolean	hasDistinctAggregate;	// true if distinct aggregate
 	public	boolean isInSortedOrder;				// true if source results in sorted order
+	private	int numDistinctAggs = 0;
 	private int maxRowSize;
 
 	// set in open and not modified thereafter
@@ -237,7 +238,7 @@ class GroupedAggregateResultSet extends 
 		else if (!resultsComplete)
 		{
 			if (rollup)
-				resultRows = new ExecIndexRow[order.length+1];
+				resultRows = new ExecIndexRow[numGCols()+1];
 			else
 				resultRows = new ExecIndexRow[1];
 			if (aggInfoList.hasDistinct())
@@ -324,33 +325,26 @@ class GroupedAggregateResultSet extends 
 			** by that column, we just sorted it so that distinct
 			** aggregation would see the values in order.
 			*/
-			int numDistinctAggs = 0;
-			for (int i = 0; i < aggregates.length; i++)
-			{
-				AggregatorInfo aInfo = (AggregatorInfo)
-					aggInfoList.elementAt(i);
-				if (aInfo.isDistinct())
-					numDistinctAggs++;
-			}
 			// Although it seems like N aggs could have been
 			// added at the end, in fact only one has been
 			// FIXME -- need to get GroupByNode to handle this
 			// correctly, but that requires understanding
 			// scalar distinct aggregates.
 			numDistinctAggs = 1;
-			if (order.length > numDistinctAggs)
-			{
-				ColumnOrdering[] newOrder = new ColumnOrdering[
-					order.length - numDistinctAggs];
-				System.arraycopy(order, 0, newOrder, 0,
-					order.length-numDistinctAggs);
-				order = newOrder;
-			}
 		}
 		return tc.openSortScan(genericSortId,
 			activation.getResultSetHoldability());
 	}
 
+	/**
+	 * Return the number of grouping columns.
+	 *
+	 * Since some additional sort columns may have been included
+	 * in the sort for DISTINCT aggregates, this function is
+	 * used to ignore those columns when computing the grouped
+	 * results.
+	 */
+	private int numGCols() { return order.length - numDistinctAggs; }
 
 	/**
 	 * Return the next row.  
@@ -409,7 +403,7 @@ class GroupedAggregateResultSet extends 
 			{
 				boolean sameGroup = (rollup ?
 				    r <= distinguisherCol :
-				    distinguisherCol == order.length);
+				    distinguisherCol == numGCols());
 				if (sameGroup)
 				{
 					/* Same group - initialize the new
@@ -486,7 +480,7 @@ class GroupedAggregateResultSet extends 
 	private int sameGroupingValues(ExecRow currRow, ExecRow newRow)
 		throws StandardException
 	{
-		for (int index = 0; index < order.length; index++)
+		for (int index = 0; index < numGCols(); index++)
 		{
 			DataValueDescriptor currOrderable = currRow.getColumn(order[index].getColumnId() + 1);
 			DataValueDescriptor newOrderable = newRow.getColumn(order[index].getColumnId() + 1);
@@ -495,7 +489,7 @@ class GroupedAggregateResultSet extends 
 				return index;
 			}
 		}
-		return order.length;
+		return numGCols();
 	}
 
 	/**
@@ -650,7 +644,7 @@ class GroupedAggregateResultSet extends 
 		int numRolledUpCols = resultRows.length - resultNum - 1;
 		for (int i = 0; i < numRolledUpCols; i++)
 		{
-			int rolledUpColIdx = order.length - 1 - i;
+			int rolledUpColIdx = numGCols() - 1 - i;
 			DataValueDescriptor rolledUpColumn =
 				row.getColumn(order[rolledUpColIdx].getColumnId() + 1);
 			rolledUpColumn.setToNull();

Modified: db/derby/code/trunk/java/testing/org/apache/derbyTesting/functionTests/tests/lang/GroupByTest.java
URL: http://svn.apache.org/viewvc/db/derby/code/trunk/java/testing/org/apache/derbyTesting/functionTests/tests/lang/GroupByTest.java?rev=1292134&r1=1292133&r2=1292134&view=diff
==============================================================================
--- db/derby/code/trunk/java/testing/org/apache/derbyTesting/functionTests/tests/lang/GroupByTest.java (original)
+++ db/derby/code/trunk/java/testing/org/apache/derbyTesting/functionTests/tests/lang/GroupByTest.java Wed Feb 22 06:05:55 2012
@@ -2611,4 +2611,262 @@ public class GroupByTest extends BaseJDB
 
             rollback();
     }
+
+
+    /**
+     * DISTINCT aggregates in result sets which are opened multiple times.
+     * DERBY-5584.
+     * @throws SQLException
+     */
+    public void testDerby5584()
+	throws SQLException
+    {
+        setAutoCommit(false);
+        Statement s = createStatement();
+        ResultSet rs;
+
+        s.executeUpdate(
+		"CREATE TABLE TEST_5 (" +
+		"       profile_id INTEGER NOT NULL," +
+		"       group_ref INTEGER NOT NULL," +
+		"       matched_count INTEGER NOT NULL )"); 
+
+        s.executeUpdate(
+		"CREATE TABLE TEST_6 ( " +
+		"       profile_id INTEGER NOT NULL, " +
+		"       group_ref INTEGER NOT NULL, " +
+		"       matched_count INTEGER NOT NULL )"); 
+
+        s.executeUpdate( "insert into test_5 values (1, 10000, 1)" ); 
+        s.executeUpdate( "insert into test_5 values (2, 10000, 2)" ); 
+
+        s.executeUpdate( "insert into test_6 values (1, 10000, 1)" ); 
+        s.executeUpdate( "insert into test_6 values (2, 10000, 2)" );
+
+        rs = s.executeQuery( "SELECT ps1.group_ref," +
+		"COUNT(DISTINCT ps1.matched_count) AS matched_count" +
+		" FROM test_5 ps1 " +
+		" GROUP BY ps1.group_ref, ps1.profile_id" );
+        JDBC.assertFullResultSet(rs, new String[][] {
+                {"10000", "1"},
+                {"10000", "1"}
+	});
+
+        rs = s.executeQuery( "SELECT ps1.group_ref," +
+		"COUNT(ps1.matched_count) AS matched_count" +
+		" FROM test_5 ps1 " +
+		" GROUP BY ps1.group_ref, ps1.profile_id" );
+        JDBC.assertFullResultSet(rs, new String[][] {
+                {"10000", "1"},
+                {"10000", "1"}
+	});
+
+	String cartProdWithDISTINCTsubqueries = " SELECT *" +
+		" FROM " +
+		" (SELECT ps1.group_ref, ps1.profile_id, " +
+		"         COUNT(DISTINCT ps1.matched_count) AS matched_count " +
+		"  FROM test_5 ps1" +
+		"  GROUP BY ps1.group_ref, ps1.profile_id " +
+		" ) a, " +
+		" (SELECT ps2.group_ref, ps2.profile_id, " +
+		"         COUNT( DISTINCT ps2.matched_count) AS matched_count" +
+		"  FROM test_6 ps2" +
+		"  GROUP BY ps2.group_ref, ps2.profile_id " +
+		") b ";
+
+	String cartProdWithSubqueries = " SELECT * " +
+		" FROM " +
+		" (SELECT ps1.group_ref, ps1.profile_id, " +
+		"         COUNT(ps1.matched_count) AS matched_count " +
+		"  FROM test_5 ps1 " +
+		"  GROUP BY ps1.group_ref, ps1.profile_id " +
+		") a, " +
+		" (SELECT ps2.group_ref, ps2.profile_id, " +
+		"         COUNT( ps2.matched_count) AS matched_count " +
+		"  FROM test_6 ps2 " +
+		"  GROUP BY ps2.group_ref, ps2.profile_id " +
+		") b ";
+
+	String cartProdWithOrderBySubqueries = "SELECT * " +
+		" FROM " +
+		" (SELECT ps1.group_ref, ps1.profile_id " +
+		"  FROM test_5 ps1 ORDER BY profile_id fetch first 3 rows only) a, " +
+		" (SELECT ps2.group_ref, ps2.profile_id " +
+		"  FROM test_6 ps2 ORDER BY PROFILE_ID fetch first 2 rows only) b "; 
+
+
+	rs = s.executeQuery( cartProdWithDISTINCTsubqueries );
+        JDBC.assertFullResultSet(rs, new String[][] {
+		{"10000", "1", "1", "10000", "1", "1"},
+		{"10000", "1", "1", "10000", "2", "1"},
+		{"10000", "2", "1", "10000", "1", "1"},
+		{"10000", "2", "1", "10000", "2", "1"}
+	});
+
+	rs = s.executeQuery( cartProdWithSubqueries );
+        JDBC.assertFullResultSet(rs, new String[][] {
+		{"10000", "1", "1", "10000", "1", "1"},
+		{"10000", "1", "1", "10000", "2", "1"},
+		{"10000", "2", "1", "10000", "1", "1"},
+		{"10000", "2", "1", "10000", "2", "1"}
+	});
+
+        s.executeUpdate( "insert into test_5 values (3, 10000, 3)" ); 
+
+	rs = s.executeQuery( cartProdWithDISTINCTsubqueries );
+        JDBC.assertFullResultSet(rs, new String[][] {
+		{"10000", "1", "1", "10000", "1", "1"},
+		{"10000", "1", "1", "10000", "2", "1"},
+		{"10000", "2", "1", "10000", "1", "1"},
+		{"10000", "2", "1", "10000", "2", "1"},
+		{"10000", "3", "1", "10000", "1", "1"},
+		{"10000", "3", "1", "10000", "2", "1"}
+	});
+
+	rs = s.executeQuery( cartProdWithSubqueries );
+        JDBC.assertFullResultSet(rs, new String[][] {
+		{"10000", "1", "1", "10000", "1", "1"},
+		{"10000", "1", "1", "10000", "2", "1"},
+		{"10000", "2", "1", "10000", "1", "1"},
+		{"10000", "2", "1", "10000", "2", "1"},
+		{"10000", "3", "1", "10000", "1", "1"},
+		{"10000", "3", "1", "10000", "2", "1"}
+	});
+
+        s.executeUpdate( "insert into test_5 values (4, 10000, 4) "); 
+        s.executeUpdate( "insert into test_6 values (3, 10000, 3) "); 
+
+	// NOTE: At this point,
+	//   test_5 contains:		test_6 contains:
+	//	1, 10000, 1			1, 10000, 1
+	//	2, 10000, 2			2, 10000, 2
+	//	3, 10000, 3			3, 10000, 3
+	//	4, 10000, 4
+
+	rs = s.executeQuery( cartProdWithDISTINCTsubqueries );
+        JDBC.assertFullResultSet(rs, new String[][] {
+		{"10000", "1", "1", "10000", "1", "1"},
+		{"10000", "1", "1", "10000", "2", "1"},
+		{"10000", "1", "1", "10000", "3", "1"},
+		{"10000", "2", "1", "10000", "1", "1"},
+		{"10000", "2", "1", "10000", "2", "1"},
+		{"10000", "2", "1", "10000", "3", "1"},
+		{"10000", "3", "1", "10000", "1", "1"},
+		{"10000", "3", "1", "10000", "2", "1"},
+		{"10000", "3", "1", "10000", "3", "1"},
+		{"10000", "4", "1", "10000", "1", "1"},
+		{"10000", "4", "1", "10000", "2", "1"},
+		{"10000", "4", "1", "10000", "3", "1"}
+	});
+
+	rs = s.executeQuery( cartProdWithSubqueries );
+        JDBC.assertFullResultSet(rs, new String[][] {
+		{"10000", "1", "1", "10000", "1", "1"},
+		{"10000", "1", "1", "10000", "2", "1"},
+		{"10000", "1", "1", "10000", "3", "1"},
+		{"10000", "2", "1", "10000", "1", "1"},
+		{"10000", "2", "1", "10000", "2", "1"},
+		{"10000", "2", "1", "10000", "3", "1"},
+		{"10000", "3", "1", "10000", "1", "1"},
+		{"10000", "3", "1", "10000", "2", "1"},
+		{"10000", "3", "1", "10000", "3", "1"},
+		{"10000", "4", "1", "10000", "1", "1"},
+		{"10000", "4", "1", "10000", "2", "1"},
+		{"10000", "4", "1", "10000", "3", "1"}
+	});
+
+        s.executeUpdate( "insert into test_6 values (2, 10000, 1) "); 
+
+	rs = s.executeQuery( cartProdWithDISTINCTsubqueries );
+        JDBC.assertFullResultSet(rs, new String[][] {
+		{"10000", "1", "1", "10000", "1", "1"},
+		{"10000", "1", "1", "10000", "2", "2"},
+		{"10000", "1", "1", "10000", "3", "1"},
+		{"10000", "2", "1", "10000", "1", "1"},
+		{"10000", "2", "1", "10000", "2", "2"},
+		{"10000", "2", "1", "10000", "3", "1"},
+		{"10000", "3", "1", "10000", "1", "1"},
+		{"10000", "3", "1", "10000", "2", "2"},
+		{"10000", "3", "1", "10000", "3", "1"},
+		{"10000", "4", "1", "10000", "1", "1"},
+		{"10000", "4", "1", "10000", "2", "2"},
+		{"10000", "4", "1", "10000", "3", "1"}
+	});
+
+	rs = s.executeQuery( cartProdWithSubqueries );
+        JDBC.assertFullResultSet(rs, new String[][] {
+		{"10000", "1", "1", "10000", "1", "1"},
+		{"10000", "1", "1", "10000", "2", "2"},
+		{"10000", "1", "1", "10000", "3", "1"},
+		{"10000", "2", "1", "10000", "1", "1"},
+		{"10000", "2", "1", "10000", "2", "2"},
+		{"10000", "2", "1", "10000", "3", "1"},
+		{"10000", "3", "1", "10000", "1", "1"},
+		{"10000", "3", "1", "10000", "2", "2"},
+		{"10000", "3", "1", "10000", "3", "1"},
+		{"10000", "4", "1", "10000", "1", "1"},
+		{"10000", "4", "1", "10000", "2", "2"},
+		{"10000", "4", "1", "10000", "3", "1"}
+	});
+
+	// Now introduce some duplicate values so that the DISTINCT
+	// aggregates have some work to do
+
+
+        s.executeUpdate( "insert into test_6 values (1, 10000, 1) "); 
+        s.executeUpdate( "insert into test_6 values (2, 10000, 2) "); 
+
+	// NOTE: At this point,
+	//   test_5 contains:		test_6 contains:
+	//	1, 10000, 1			1, 10000, 1 (2 vals, 1 distinct)
+	//	2, 10000, 2			1, 10000, 1 
+	//	3, 10000, 3			2, 10000, 1 (3 vals, 2 distinct)
+	//	4, 10000, 4			2, 10000, 2
+	//	 				2, 10000, 2
+	//	 				3, 10000, 2 (1 val, 1 distinct)
+
+	rs = s.executeQuery( cartProdWithDISTINCTsubqueries );
+        JDBC.assertFullResultSet(rs, new String[][] {
+		{"10000", "1", "1", "10000", "1", "1"},
+		{"10000", "1", "1", "10000", "2", "2"},
+		{"10000", "1", "1", "10000", "3", "1"},
+		{"10000", "2", "1", "10000", "1", "1"},
+		{"10000", "2", "1", "10000", "2", "2"},
+		{"10000", "2", "1", "10000", "3", "1"},
+		{"10000", "3", "1", "10000", "1", "1"},
+		{"10000", "3", "1", "10000", "2", "2"},
+		{"10000", "3", "1", "10000", "3", "1"},
+		{"10000", "4", "1", "10000", "1", "1"},
+		{"10000", "4", "1", "10000", "2", "2"},
+		{"10000", "4", "1", "10000", "3", "1"}
+	});
+
+	rs = s.executeQuery( cartProdWithSubqueries );
+        JDBC.assertFullResultSet(rs, new String[][] {
+		{"10000", "1", "1", "10000", "1", "2"},
+		{"10000", "1", "1", "10000", "2", "3"},
+		{"10000", "1", "1", "10000", "3", "1"},
+		{"10000", "2", "1", "10000", "1", "2"},
+		{"10000", "2", "1", "10000", "2", "3"},
+		{"10000", "2", "1", "10000", "3", "1"},
+		{"10000", "3", "1", "10000", "1", "2"},
+		{"10000", "3", "1", "10000", "2", "3"},
+		{"10000", "3", "1", "10000", "3", "1"},
+		{"10000", "4", "1", "10000", "1", "2"},
+		{"10000", "4", "1", "10000", "2", "3"},
+		{"10000", "4", "1", "10000", "3", "1"}
+	});
+
+	rs = s.executeQuery( cartProdWithOrderBySubqueries );
+        JDBC.assertFullResultSet(rs, new String[][] {
+		{"10000", "1", "10000", "1"},
+		{"10000", "1", "10000", "1"},
+		{"10000", "2", "10000", "1"},
+		{"10000", "2", "10000", "1"},
+		{"10000", "3", "10000", "1"},
+		{"10000", "3", "10000", "1"}
+	});
+
+            rollback();
+    }
 }