You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@trafodion.apache.org by hz...@apache.org on 2018/01/30 16:00:38 UTC

[1/3] trafodion git commit: [TRAFODION-2912] Better handling of non-deterministic scalar UDFs

Repository: trafodion
Updated Branches:
  refs/heads/master 4ef8055f0 -> 6d38e5809


[TRAFODION-2912] Better handling of non-deterministic scalar UDFs

Fix some issues found by Andy Yang and others while writing a
non-deterministic scalar UDF (a random generator in this case).

This UDF was transformed into a hash join, which executes the UDF
only once and not once per row. Another problem is the probe cache,
which can also lead to a single execution instead of once per row.

The fix records the non-deterministic UDF attribute in the group
attributes and it adds checks in the normalizer to suppress the
conversion from a TSJ to a non-TSJ when non-deterministic UDFs are
present. The probe cache logic already had this check, so all that was
needed was to set the attribute.

Note that there may be some more complex queries where we still won't
execute the UDF once per row. In general, there is no absolute
guarantee that a non-deterministic scalar UDF is executed once per row
(of the cartesian product of all the tables joined??). However, in
simple cases like the added test we should try to call the UDF for
every row that satisfies the join predicates.


Project: http://git-wip-us.apache.org/repos/asf/trafodion/repo
Commit: http://git-wip-us.apache.org/repos/asf/trafodion/commit/a7256545
Tree: http://git-wip-us.apache.org/repos/asf/trafodion/tree/a7256545
Diff: http://git-wip-us.apache.org/repos/asf/trafodion/diff/a7256545

Branch: refs/heads/master
Commit: a725654560fabfafc2b81568720f43a24a1b3007
Parents: dc94344
Author: Hans Zeller <hz...@apache.org>
Authored: Mon Jan 29 19:20:50 2018 +0000
Committer: Hans Zeller <hz...@apache.org>
Committed: Mon Jan 29 19:20:50 2018 +0000

----------------------------------------------------------------------
 core/sql/optimizer/NormItemExpr.cpp        |   3 +-
 core/sql/optimizer/NormRelExpr.cpp         |  19 ++--
 core/sql/regress/udr/EXPECTED103           | 125 ++++++++++++++++++++++++
 core/sql/regress/udr/TEST103               |  22 +++++
 core/sql/regress/udr/TEST103_functions.cpp |  34 ++++++-
 5 files changed, 194 insertions(+), 9 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/trafodion/blob/a7256545/core/sql/optimizer/NormItemExpr.cpp
----------------------------------------------------------------------
diff --git a/core/sql/optimizer/NormItemExpr.cpp b/core/sql/optimizer/NormItemExpr.cpp
index abd213a..fb4996a 100644
--- a/core/sql/optimizer/NormItemExpr.cpp
+++ b/core/sql/optimizer/NormItemExpr.cpp
@@ -949,7 +949,8 @@ void UDFunction::transformToRelExpr(NormWA & normWARef,
 
        // The routine desc points to both the NARoutine structs.
        isUdf->setRoutineDesc(udfDesc_);
-    
+       isUdf->getGroupAttr()->setHasNonDeterministicUDRs(
+            !getRoutineDesc()->getEffectiveNARoutine()->isDeterministic());
 
        // Get the valueIds of the parameters
        isUdf->gatherParamValueIds(isUdf->getProcAllParamsTree(), 

http://git-wip-us.apache.org/repos/asf/trafodion/blob/a7256545/core/sql/optimizer/NormRelExpr.cpp
----------------------------------------------------------------------
diff --git a/core/sql/optimizer/NormRelExpr.cpp b/core/sql/optimizer/NormRelExpr.cpp
index 781ecc9..e53f954 100644
--- a/core/sql/optimizer/NormRelExpr.cpp
+++ b/core/sql/optimizer/NormRelExpr.cpp
@@ -1464,7 +1464,8 @@ void Join::transformNode(NormWA & normWARef,
   //   reference the left subtree data.
   // 
   // For RoutineJoins/Udfs we also want to convert it to a join if the UDF
-  // does not need any inputs from the left. 
+  // does not need any inputs from the left and if the routine is
+  // deterministic.
   // ---------------------------------------------------------------------
   if (isTSJ())
     {
@@ -1597,18 +1598,21 @@ void Join::transformNode(NormWA & normWARef,
          if (crossReferences2.isEmpty() && 
              !isTSJForWrite()           &&
              !getInliningInfo().isDrivingPipelinedActions() &&
-             !getInliningInfo().isDrivingTempInsert() )// Triggers -
+             !getInliningInfo().isDrivingTempInsert() && // Triggers -
+             !(isRoutineJoin() &&
+               child(1).getGroupAttr()->getHasNonDeterministicUDRs()))
          {
            // Remember we used to be a RoutineJoin. This is used to determine
            // what type of contexts for partitioning we will try in OptPhysRel.
-           if (isRoutineJoin()) setDerivedFromRoutineJoin();
+           if (isRoutineJoin())
+             setDerivedFromRoutineJoin();
            convertToNotTsj();
          }
     
       else
          {
            // We have a TSJ that will be changed to Nested join 
-           // safe to change NOtIn here to non equi-predicate form (NE)
+           // safe to change NotIn here to non equi-predicate form (NE)
            // at this point only the case on single column NotIn can reach here
            // and the either the outer or inner column or both is nullable
            // and may have null values
@@ -2319,17 +2323,20 @@ RelExpr * Join::normalizeNode(NormWA & normWARef)
   // and if a value that is produced by the left subtree is not
   // referenced in the right subtree, 
   // ---------------------------------------------------------------------
-  if (isATSJ AND NOT isTSJForWrite() AND //NOT isRoutineJoin() AND
+  if (isATSJ AND NOT isTSJForWrite() AND
       NOT child(1)->getGroupAttr()->
             getCharacteristicInputs().referencesOneValueFromTheSet
                (child(0)->getGroupAttr()->getCharacteristicOutputs())
       && !getInliningInfo().isDrivingPipelinedActions() 
       && !getInliningInfo().isDrivingTempInsert() // Triggers -
+      && !(isRoutineJoin() &&
+           child(1).getGroupAttr()->getHasNonDeterministicUDRs())
      )
   {
     // Remember we used to be a RoutineJoin. This is used to determine
     // what type of contexts for partitioning we will try in OptPhysRel.
-    if (isRoutineJoin()) setDerivedFromRoutineJoin();
+    if (isRoutineJoin())
+      setDerivedFromRoutineJoin();
 
     convertToNotTsj();
     // ---------------------------------------------------------------

http://git-wip-us.apache.org/repos/asf/trafodion/blob/a7256545/core/sql/regress/udr/EXPECTED103
----------------------------------------------------------------------
diff --git a/core/sql/regress/udr/EXPECTED103 b/core/sql/regress/udr/EXPECTED103
index 197e0d1..aab044c 100644
--- a/core/sql/regress/udr/EXPECTED103
+++ b/core/sql/regress/udr/EXPECTED103
@@ -209,6 +209,68 @@ CREATE LIBRARY TRAFODION.UDR103SCH.FUNCTIONSFORTEST103 FILE '/mnt2/ansharma/ansh
 
 --- SQL operation complete.
 >>
+>>create function nonDeterministicRandom
++>  () returns (r integer)
++>  language c parameter style sql external name 'nonDeterministicRandom'
++>  library functionsForTest103
++>  not deterministic no sql final call allow any parallelism state area size 1024 ;
+
+--- SQL operation complete.
+>>
+>>cqd nested_joins 'off';
+
+--- SQL operation complete.
+>>cqd merge_joins 'off';
+
+--- SQL operation complete.
+>>cqd join_order_by_user 'on';
+
+--- SQL operation complete.
+>>prepare s from
++>select a, nonDeterministicRandom() r1,
++>       generateRandomNumber(a, 4) r2, generateRandomNumber(123, 4) r3
++>from (values (1), (2), (3)) T(a);
+
+--- SQL command prepared.
+>>explain options 'f' s;
+
+LC   RC   OP   OPERATOR              OPT       DESCRIPTION           CARD
+---- ---- ---- --------------------  --------  --------------------  ---------
+
+8    .    9    root                                                  3.00E+000
+7    1    8    hybrid_hash_join                                      3.00E+000
+4    6    7    nested_join                                           3.00E+000
+5    .    6    probe_cache                                           1.00E+000
+.    .    5    isolated_scalar_udf             GENERATERANDOMNUMBER  1.00E+000
+2    3    4    nested_join                                           3.00E+000
+.    .    3    isolated_scalar_udf             NONDETERMINISTICRAND  1.00E+000
+.    .    2    tuplelist                                             3.00E+000
+.    .    1    isolated_scalar_udf             GENERATERANDOMNUMBER  1.00E+000
+
+--- SQL operation complete.
+>>-- r1: Nested join, no probe cache, because it's non-deterministic
+>>-- r2: Nested join, probe cache, because it refers to the outer table
+>>-- r3: Hash join, because it's deterministic, no refs to outer
+>>execute s;
+
+A       R1           R2              R3            
+------  -----------  --------------  --------------
+
+     1          555  3675            3330          
+     2          316  1985            3330          
+     3          209  6580            3330          
+
+--- 3 row(s) selected.
+>>cqd nested_joins reset;
+
+--- SQL operation complete.
+>>cqd merge_joins reset;
+
+--- SQL operation complete.
+>>cqd join_order_by_user reset;
+
+--- SQL operation complete.
+>>
 >>-- procedures
 >>create procedure updateSubscriptions(
 +>  IN operation char(20),
@@ -398,6 +460,68 @@ CREATE LIBRARY TRAFODION.UDR103SCH.FUNCTIONSFORTEST103 FILE '/mnt2/ansharma/ansh
 
 --- SQL operation complete.
 >>
+>>create function nonDeterministicRandom
++>  () returns (r integer)
++>  language c parameter style sql external name 'nonDeterministicRandom'
++>  library functionsForTest103
++>  not deterministic no sql final call allow any parallelism state area size 1024 ;
+
+--- SQL operation complete.
+>>
+>>cqd nested_joins 'off';
+
+--- SQL operation complete.
+>>cqd merge_joins 'off';
+
+--- SQL operation complete.
+>>cqd join_order_by_user 'on';
+
+--- SQL operation complete.
+>>prepare s from
++>select a, nonDeterministicRandom() r1,
++>       generateRandomNumber(a, 4) r2, generateRandomNumber(123, 4) r3
++>from (values (1), (2), (3)) T(a);
+
+--- SQL command prepared.
+>>explain options 'f' s;
+
+LC   RC   OP   OPERATOR              OPT       DESCRIPTION           CARD
+---- ---- ---- --------------------  --------  --------------------  ---------
+
+8    .    9    root                                                  3.00E+000
+7    1    8    hybrid_hash_join                                      3.00E+000
+4    6    7    nested_join                                           3.00E+000
+5    .    6    probe_cache                                           1.00E+000
+.    .    5    isolated_scalar_udf             GENERATERANDOMNUMBER  1.00E+000
+2    3    4    nested_join                                           3.00E+000
+.    .    3    isolated_scalar_udf             NONDETERMINISTICRAND  1.00E+000
+.    .    2    tuplelist                                             3.00E+000
+.    .    1    isolated_scalar_udf             GENERATERANDOMNUMBER  1.00E+000
+
+--- SQL operation complete.
+>>-- r1: Nested join, no probe cache, because it's non-deterministic
+>>-- r2: Nested join, probe cache, because it refers to the outer table
+>>-- r3: Hash join, because it's deterministic, no refs to outer
+>>execute s;
+
+A       R1           R2              R3            
+------  -----------  --------------  --------------
+
+     1          555  3675            3330          
+     2          316  1985            3330          
+     3          209  6580            3330          
+
+--- 3 row(s) selected.
+>>cqd nested_joins reset;
+
+--- SQL operation complete.
+>>cqd merge_joins reset;
+
+--- SQL operation complete.
+>>cqd join_order_by_user reset;
+
+--- SQL operation complete.
+>>
 >>-- procedures
 >>create procedure updateSubscriptions(
 +>  IN operation char(20),
@@ -427,6 +551,7 @@ Functions in Schema TRAFODION.UDR103SCH
 CANACCESSVIEW
 GENERATEPHONENUMBER
 GENERATERANDOMNUMBER
+NONDETERMINISTICRANDOM
 
 --- SQL operation complete.
 >>get procedures in schema udr103sch;

http://git-wip-us.apache.org/repos/asf/trafodion/blob/a7256545/core/sql/regress/udr/TEST103
----------------------------------------------------------------------
diff --git a/core/sql/regress/udr/TEST103 b/core/sql/regress/udr/TEST103
index 4feb957..4446ff2 100644
--- a/core/sql/regress/udr/TEST103
+++ b/core/sql/regress/udr/TEST103
@@ -225,6 +225,28 @@ create function generateRandomNumber
   library functionsForTest103
   deterministic no sql final call allow any parallelism state area size 1024 ;
 
+create function nonDeterministicRandom
+  () returns (r integer)
+  language c parameter style sql external name 'nonDeterministicRandom'
+  library functionsForTest103
+  not deterministic no sql final call allow any parallelism state area size 1024 ;
+
+cqd nested_joins 'off';
+cqd merge_joins 'off';
+cqd join_order_by_user 'on';
+prepare s from
+select a, nonDeterministicRandom() r1,
+       generateRandomNumber(a, 4) r2, generateRandomNumber(123, 4) r3
+from (values (1), (2), (3)) T(a);
+explain options 'f' s;
+-- r1: Nested join, no probe cache, because it's non-deterministic
+-- r2: Nested join, probe cache, because it refers to the outer table
+-- r3: Hash join, because it's deterministic, no refs to outer
+execute s;
+cqd nested_joins reset;
+cqd merge_joins reset;
+cqd join_order_by_user reset;
+
 -- procedures
 create procedure updateSubscriptions(
   IN operation char(20),

http://git-wip-us.apache.org/repos/asf/trafodion/blob/a7256545/core/sql/regress/udr/TEST103_functions.cpp
----------------------------------------------------------------------
diff --git a/core/sql/regress/udr/TEST103_functions.cpp b/core/sql/regress/udr/TEST103_functions.cpp
index 1ee0531..b997935 100644
--- a/core/sql/regress/udr/TEST103_functions.cpp
+++ b/core/sql/regress/udr/TEST103_functions.cpp
@@ -69,7 +69,7 @@ SQLUDR_LIBFUNC SQLUDR_INT32 genPhoneNumber(SQLUDR_INT32 *in1, // seed
     }
   }
 
-  strcpy(out, result.c_str());
+  memcpy(out, result.c_str(), result.length());
   return SQLUDR_SUCCESS;
 }
 
@@ -117,11 +117,41 @@ SQLUDR_LIBFUNC SQLUDR_INT32 genRandomNumber(SQLUDR_INT32 *in1,
     }
   }
   
-  strcpy(out, result.c_str());
+  memcpy(out, result.c_str(), result.length());
   return SQLUDR_SUCCESS;
 }
 
 
+SQLUDR_LIBFUNC SQLUDR_INT32 nonDeterministicRandom(SQLUDR_INT32 *out1,
+                                                   SQLUDR_INT16 *outInd1,
+                                                   SQLUDR_TRAIL_ARGS)
+{
+  if (calltype == SQLUDR_CALLTYPE_FINAL)
+    return SQLUDR_SUCCESS;
+
+  // pointer to the buffer in the state area that is
+  // available for the lifetime of this statement,
+  // this can be used by the UDF to maintain state
+  int *my_state = (int *) statearea->stmt_data.data;
+
+  if (calltype == SQLUDR_CALLTYPE_INITIAL && *my_state == 0)
+    {
+      *my_state = 555;
+    }
+  else
+    // Use a simple linear congruential generator, we
+    // want deterministic regression results, despite
+    // the name of this function. Note that a
+    // this UDF is still "not deterministic", since it
+    // returns different results when called with the
+    // same (empty) inputs.
+    *my_state = (13 * (*my_state) + 101) % 1000;
+
+  (*out1) = *my_state;
+
+  return SQLUDR_SUCCESS;
+}
+
 SQLUDR_LIBFUNC SQLUDR_INT32 canAccessView(SQLUDR_CHAR *inZoneNeeded,
                                           SQLUDR_CHAR *inZoneHas,
                                           SQLUDR_INT32 *inPackageNeeded,


[2/3] trafodion git commit: Fix typo.

Posted by hz...@apache.org.
Fix typo.


Project: http://git-wip-us.apache.org/repos/asf/trafodion/repo
Commit: http://git-wip-us.apache.org/repos/asf/trafodion/commit/81db5c01
Tree: http://git-wip-us.apache.org/repos/asf/trafodion/tree/81db5c01
Diff: http://git-wip-us.apache.org/repos/asf/trafodion/diff/81db5c01

Branch: refs/heads/master
Commit: 81db5c01c3dbec0b5a501909b6cc9e2d8b037964
Parents: a725654
Author: Hans Zeller <hz...@apache.org>
Authored: Tue Jan 30 00:48:22 2018 +0000
Committer: Hans Zeller <hz...@apache.org>
Committed: Tue Jan 30 00:48:22 2018 +0000

----------------------------------------------------------------------
 core/sql/regress/udr/TEST103_functions.cpp | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/trafodion/blob/81db5c01/core/sql/regress/udr/TEST103_functions.cpp
----------------------------------------------------------------------
diff --git a/core/sql/regress/udr/TEST103_functions.cpp b/core/sql/regress/udr/TEST103_functions.cpp
index b997935..0da151c 100644
--- a/core/sql/regress/udr/TEST103_functions.cpp
+++ b/core/sql/regress/udr/TEST103_functions.cpp
@@ -139,12 +139,11 @@ SQLUDR_LIBFUNC SQLUDR_INT32 nonDeterministicRandom(SQLUDR_INT32 *out1,
       *my_state = 555;
     }
   else
-    // Use a simple linear congruential generator, we
-    // want deterministic regression results, despite
-    // the name of this function. Note that a
-    // this UDF is still "not deterministic", since it
-    // returns different results when called with the
-    // same (empty) inputs.
+    // Use a simple linear congruential generator, we want
+    // deterministic regression results, despite the name of this
+    // function. Note that this UDF is still "not deterministic",
+    // since it returns different results when called with the same
+    // (empty) inputs.
     *my_state = (13 * (*my_state) + 101) % 1000;
 
   (*out1) = *my_state;


[3/3] trafodion git commit: Merge [TRAFODION-2912] Better handling of non-deterministic scalar UDFs PR-1420

Posted by hz...@apache.org.
Merge [TRAFODION-2912] Better handling of non-deterministic scalar UDFs PR-1420


Project: http://git-wip-us.apache.org/repos/asf/trafodion/repo
Commit: http://git-wip-us.apache.org/repos/asf/trafodion/commit/6d38e580
Tree: http://git-wip-us.apache.org/repos/asf/trafodion/tree/6d38e580
Diff: http://git-wip-us.apache.org/repos/asf/trafodion/diff/6d38e580

Branch: refs/heads/master
Commit: 6d38e58097d1b592983fa7e478dc839ad9509bdc
Parents: 4ef8055 81db5c0
Author: Hans Zeller <hz...@apache.org>
Authored: Tue Jan 30 15:59:53 2018 +0000
Committer: Hans Zeller <hz...@apache.org>
Committed: Tue Jan 30 15:59:53 2018 +0000

----------------------------------------------------------------------
 core/sql/optimizer/NormItemExpr.cpp        |   3 +-
 core/sql/optimizer/NormRelExpr.cpp         |  19 ++--
 core/sql/regress/udr/EXPECTED103           | 125 ++++++++++++++++++++++++
 core/sql/regress/udr/TEST103               |  22 +++++
 core/sql/regress/udr/TEST103_functions.cpp |  33 ++++++-
 5 files changed, 193 insertions(+), 9 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/trafodion/blob/6d38e580/core/sql/optimizer/NormRelExpr.cpp
----------------------------------------------------------------------