You are viewing a plain text version of this content. The canonical link for it is here.
Posted to derby-commits@db.apache.org by ka...@apache.org on 2014/06/19 11:44:16 UTC
svn commit: r1603793 - in /db/derby/code/trunk/java: engine/org/apache/derby/impl/sql/execute/GroupedAggregateResultSet.java testing/org/apache/derbyTesting/functionTests/tests/lang/CollationTest.java

Author: kahatlen
Date: Thu Jun 19 09:44:15 2014
New Revision: 1603793

URL: http://svn.apache.org/r1603793
Log:
DERBY-6227: Distinct aggregates don't work well with territory-based collation

Make the duplicate elimination use the DataValueDescriptors directly instead
of converting the values to String objects first. This ensures that the
collation rules of the database are used to compare the values, and those
rules may be different from those used by String.equals().

Modified:
    db/derby/code/trunk/java/engine/org/apache/derby/impl/sql/execute/GroupedAggregateResultSet.java
    db/derby/code/trunk/java/testing/org/apache/derbyTesting/functionTests/tests/lang/CollationTest.java

Modified: db/derby/code/trunk/java/engine/org/apache/derby/impl/sql/execute/GroupedAggregateResultSet.java
URL: http://svn.apache.org/viewvc/db/derby/code/trunk/java/engine/org/apache/derby/impl/sql/execute/GroupedAggregateResultSet.java?rev=1603793&r1=1603792&r2=1603793&view=diff
==============================================================================
--- db/derby/code/trunk/java/engine/org/apache/derby/impl/sql/execute/GroupedAggregateResultSet.java (original)
+++ db/derby/code/trunk/java/engine/org/apache/derby/impl/sql/execute/GroupedAggregateResultSet.java Thu Jun 19 09:44:15 2014
@@ -121,7 +121,7 @@ class GroupedAggregateResultSet extends 
 	private boolean resultsComplete;
 	private List<ExecRow> finishedResults;
 	private ExecIndexRow[]			resultRows;
-    private List<List<Set<String>>> distinctValues;
+    private List<List<Set<DataValueDescriptor>>> distinctValues;
 
 	private boolean rollup;
 	private boolean usingAggregateObserver = false;
@@ -239,8 +239,8 @@ class GroupedAggregateResultSet extends 
 				resultRows = new ExecIndexRow[1];
 			if (aggInfoList.hasDistinct())
             {
-                distinctValues =
-                        new ArrayList<List<Set<String>>>(resultRows.length);
+                distinctValues = new ArrayList<List<Set<DataValueDescriptor>>>(
+                        resultRows.length);
             }
 			for (int r = 0; r < resultRows.length; r++)
 			{
@@ -249,8 +249,8 @@ class GroupedAggregateResultSet extends 
 				initializeVectorAggregation(resultRows[r]);
 				if (aggInfoList.hasDistinct())
                 {
-                    distinctValues.add(
-                            new ArrayList<Set<String>>(aggregates.length));
+                    distinctValues.add(new ArrayList<Set<DataValueDescriptor>>(
+                            aggregates.length));
                     initializeDistinctMaps(r, true);
                 }
 			}
@@ -762,12 +762,24 @@ class GroupedAggregateResultSet extends 
             AggregatorInfo aInfo = aggInfoList.elementAt(i);
 			if (aInfo.isDistinct())
 			{
+                if (SanityManager.DEBUG) {
+                    // Distinct aggregates currently always use the sorter.
+                    // Assert that it is so.
+                    SanityManager.ASSERT(!isInSortedOrder);
+                    SanityManager.ASSERT(scanController != null);
+
+                    // If we ever start reading directly from the source
+                    // result set, we should call source.needsToClone() to
+                    // check if we need to clone the value before adding it
+                    // to the set of distinct values. Don't clone it for now.
+                }
+
 				DataValueDescriptor newValue = currAggregate.getInputColumnValue(newRow);
 				// A NULL value is always distinct, so we only
 				// have to check for duplicate values for
 				// non-NULL values.
-                String str = newValue.getString();
-                if (str != null && !distinctValues.get(level).get(i).add(str))
+                if (!newValue.isNull()
+                        && !distinctValues.get(level).get(i).add(newValue))
 				{
                     // The value was already in the set, and we only look
                     // for distinct values. Skip this value.
@@ -792,44 +804,18 @@ class GroupedAggregateResultSet extends 
                 // Otherwise, insert null so that the list is of the right
                 // size and the indexes match those in aggregates[].
                 distinctValues.get(r).add(aInfo.isDistinct() ?
-                        new HashSet<String>() : null);
+                        new HashSet<DataValueDescriptor>() : null);
             }
 
 			if (aInfo.isDistinct())
 			{
-                Set<String> set = distinctValues.get(r).get(a);
+                Set<DataValueDescriptor> set = distinctValues.get(r).get(a);
                 set.clear();
 				DataValueDescriptor newValue =
 					aggregates[a].getInputColumnValue(resultRows[r]);
-                set.add(newValue.getString());
+                set.add(newValue);
 			}
 		}
 	}
 
-        private void dumpAllRows(int cR)
-            throws StandardException
-        {
-            System.out.println("dumpAllRows("+cR+"/"+resultRows.length+"):");
-            for (int r = 0; r < resultRows.length; r++)
-                System.out.println(dumpRow(resultRows[r]));
-        }
-	private String dumpRow(ExecRow r)
-		throws StandardException
-	{
-            if (r == null)
-                return "<NULL ROW>";
-        StringBuilder buf = new StringBuilder();
-	    int nCols = r.nColumns();
-	    for (int d = 0; d < nCols; d++)
-	    {
-		if (d > 0) buf.append(",");
-                DataValueDescriptor o = r.getColumn(d+1);
-                buf.append(o.getString());
-                if (o instanceof ExecAggregator)
-                    buf.append("[").
-                        append(((ExecAggregator)o).getResult().getString()).
-                        append("]");
-	    }
-	    return buf.toString();
-	}
 }

Modified: db/derby/code/trunk/java/testing/org/apache/derbyTesting/functionTests/tests/lang/CollationTest.java
URL: http://svn.apache.org/viewvc/db/derby/code/trunk/java/testing/org/apache/derbyTesting/functionTests/tests/lang/CollationTest.java?rev=1603793&r1=1603792&r2=1603793&view=diff
==============================================================================
--- db/derby/code/trunk/java/testing/org/apache/derbyTesting/functionTests/tests/lang/CollationTest.java (original)
+++ db/derby/code/trunk/java/testing/org/apache/derbyTesting/functionTests/tests/lang/CollationTest.java Thu Jun 19 09:44:15 2014
@@ -76,6 +76,7 @@ public class CollationTest extends BaseJ
     private final static String[] ENGLISH_CASE_INSENSITIVE = {
         "testUsingClauseAndNaturalJoin",
         "testNullColumnInInsert",
+        "testDerby6227",
     };
 
     /** Test cases to run with Norwegian case-sensitive collation. */
@@ -2264,4 +2265,29 @@ public void testMissingCollatorSupport()
         assertStatementError(INVALID_ESCAPE, s,
                 "select * from d6030 where x like y escape 'aa'");
     }
+
+    /**
+     * Regression test case for DERBY-6227. Distinct grouped aggregates used
+     * to eliminate duplicates by comparing the string representation of the
+     * values using {@code String.equals()}. That does not give the right
+     * results if the database collation defines equality in a different way
+     * than {@code String.equals()}, for example when running with case
+     * insensitive collation ({@code collation=TERRITORY_BASED:PRIMARY}).
+     */
+    public void testDerby6227() throws SQLException {
+        String sql = "select i, count(distinct s) "
+                + "from (values (1, 'a'), (1, 'a'), (2, 'b'), (2, 'B'), "
+                + "(3, 'a'), (3, 'A'), (3, 'b'), (3, 'B'), (3, 'c')) v(i, s) "
+                + "group by i order by i";
+
+        // These are the expected results of the query when running with
+        // case-insensitive collation. Before the fix, the query would return
+        // (1, 1), (2, 2), (3, 5).
+        String[][] expectedRows = {
+            { "1", "1" }, { "2", "1" }, { "3", "3" }
+        };
+
+        Statement s = createStatement();
+        JDBC.assertFullResultSet(s.executeQuery(sql), expectedRows);
+    }
 }