You are viewing a plain text version of this content. The canonical link for it is here.
Posted to reviews@spark.apache.org by GitBox <gi...@apache.org> on 2020/06/15 08:14:40 UTC

[GitHub] [spark] HeartSaVioR opened a new pull request #28831: [SPARK-31993][SQL] Don't split code blocks in generated code for 'concat_ws' for mixed string/array types of columns

HeartSaVioR opened a new pull request #28831:
URL: https://github.com/apache/spark/pull/28831


   ### What changes were proposed in this pull request?
   
   This patch fixes the code generation logic for mixed string/array types of columns in `concat_ws` to not split methods, because splitting method would generate the code which is unable to compile. (Refer the next section `Why are the changes needed?` for details.)
   
   The change won't bring performance issue, because it's tricky to trigger method split, and due to the relation of parts it should fail to compile whenever the method split happens.
   
   Below is the generated code for newly added UT, mixed string/array type of columns.
   
   > before the patch
   
   ```
   /* 001 */ public java.lang.Object generate(Object[] references) {
   /* 002 */   return new SpecificUnsafeProjection(references);
   /* 003 */ }
   /* 004 */
   /* 005 */ class SpecificUnsafeProjection extends org.apache.spark.sql.catalyst.expressions.UnsafeProjection {
   /* 006 */
   /* 007 */   private Object[] references;
   /* 008 */   private org.apache.spark.sql.catalyst.expressions.codegen.UnsafeRowWriter[] mutableStateArray_0 = new org.apache.spark.sql.catalyst.expressions.codegen.UnsafeRowWriter[1];
   /* 009 */
   /* 010 */   public SpecificUnsafeProjection(Object[] references) {
   /* 011 */     this.references = references;
   /* 012 */     mutableStateArray_0[0] = new org.apache.spark.sql.catalyst.expressions.codegen.UnsafeRowWriter(1, 32);
   /* 013 */
   /* 014 */   }
   /* 015 */
   /* 016 */   public void initialize(int partitionIndex) {
   /* 017 */
   /* 018 */   }
   /* 019 */
   /* 020 */   // Scala.Function1 need this
   /* 021 */   public java.lang.Object apply(java.lang.Object row) {
   /* 022 */     return apply((InternalRow) row);
   /* 023 */   }
   /* 024 */
   /* 025 */   public UnsafeRow apply(InternalRow i) {
   /* 026 */     mutableStateArray_0[0].reset();
   /* 027 */
   /* 028 */
   /* 029 */     mutableStateArray_0[0].zeroOutNullBytes();
   /* 030 */
   /* 031 */     apply_0_0(i);
   /* 032 */     apply_0_1(i);
   /* 033 */     int varargNum_0 = 30;
   /* 034 */     int idxInVararg_0 = 0;
   /* 035 */
   /* 036 */     if (!isNull_2) {
   /* 037 */       varargNum_0 += value_2.numElements();
   /* 038 */     }
   /* 039 */
   /* 040 */     if (!isNull_3) {
   /* 041 */       varargNum_0 += value_3.numElements();
   /* 042 */     }
   /* 043 */
   /* 044 */     UTF8String[] array_0 = new UTF8String[varargNum_0];
   /* 045 */     idxInVararg_0 = varargBuildsConcatWs_0_0(i, array_0, idxInVararg_0);
   /* 046 */     idxInVararg_0 = varargBuildsConcatWs_0_1(i, array_0, idxInVararg_0);
   /* 047 */     idxInVararg_0 = varargBuildsConcatWs_0_2(i, array_0, idxInVararg_0);
   /* 048 */     UTF8String value_0 = UTF8String.concatWs(((UTF8String) references[0] /* literal */), array_0);
   /* 049 */     boolean isNull_0 = value_0 == null;
   /* 050 */     mutableStateArray_0[0].write(0, value_0);
   /* 051 */     return (mutableStateArray_0[0].getRow());
   /* 052 */   }
   /* 053 */
   /* 054 */
   /* 055 */   private void apply_0_1(InternalRow i) {
   /* 056 */     UTF8String value_25 = i.getUTF8String(22);UTF8String value_26 = i.getUTF8String(23);UTF8String value_27 = i.getUTF8String(24);UTF8String value_28 = i.getUTF8String(25);UTF8String value_29 = i.getUTF8String(26);UTF8String value_30 = i.getUTF8String(27);UTF8String value_31 = i.getUTF8String(28);UTF8String value_32 = i.getUTF8String(29);UTF8String value_33 = i.getUTF8String(30);
   /* 057 */   }
   /* 058 */
   /* 059 */
   /* 060 */   private int varargBuildsConcatWs_0_0(InternalRow i, UTF8String [] array_0, int idxInVararg_0) {
   /* 061 */
   /* 062 */
   /* 063 */     if (!isNull_2) {
   /* 064 */       final int n_0 = value_2.numElements();
   /* 065 */       for (int j = 0; j < n_0; j ++) {
   /* 066 */         array_0[idxInVararg_0 ++] = value_2.getUTF8String(j);
   /* 067 */       }
   /* 068 */     }
   /* 069 */
   /* 070 */     if (!isNull_3) {
   /* 071 */       final int n_1 = value_3.numElements();
   /* 072 */       for (int j = 0; j < n_1; j ++) {
   /* 073 */         array_0[idxInVararg_0 ++] = value_3.getUTF8String(j);
   /* 074 */       }
   /* 075 */     }
   /* 076 */     array_0[idxInVararg_0 ++] = false ? (UTF8String) null : value_4;array_0[idxInVararg_0 ++] = false ? (UTF8String) null : value_5;array_0[idxInVararg_0 ++] = false ? (UTF8String) null : value_6;array_0[idxInVararg_0 ++] = false ? (UTF8String) null : value_7;array_0[idxInVararg_0 ++] = false ? (UTF8String) null : value_8;array_0[idxInVararg_0 ++] = false ? (UTF8String) null : value_9;array_0[idxInVararg_0 ++] = false ? (UTF8String) null : value_10;array_0[idxInVararg_0 ++] = false ? (UTF8String) null : value_11;
   /* 077 */     return idxInVararg_0;
   /* 078 */
   /* 079 */   }
   /* 080 */
   /* 081 */
   /* 082 */   private int varargBuildsConcatWs_0_2(InternalRow i, UTF8String [] array_0, int idxInVararg_0) {
   /* 083 */
   /* 084 */     array_0[idxInVararg_0 ++] = false ? (UTF8String) null : value_28;array_0[idxInVararg_0 ++] = false ? (UTF8String) null : value_29;array_0[idxInVararg_0 ++] = false ? (UTF8String) null : value_30;array_0[idxInVararg_0 ++] = false ? (UTF8String) null : value_31;array_0[idxInVararg_0 ++] = false ? (UTF8String) null : value_32;array_0[idxInVararg_0 ++] = false ? (UTF8String) null : value_33;
   /* 085 */     return idxInVararg_0;
   /* 086 */
   /* 087 */   }
   /* 088 */
   /* 089 */
   /* 090 */   private void apply_0_0(InternalRow i) {
   /* 091 */     boolean isNull_2 = i.isNullAt(31);
   /* 092 */     ArrayData value_2 = isNull_2 ?
   /* 093 */     null : (i.getArray(31));boolean isNull_3 = i.isNullAt(32);
   /* 094 */     ArrayData value_3 = isNull_3 ?
   /* 095 */     null : (i.getArray(32));UTF8String value_4 = i.getUTF8String(1);UTF8String value_5 = i.getUTF8String(2);UTF8String value_6 = i.getUTF8String(3);UTF8String value_7 = i.getUTF8String(4);UTF8String value_8 = i.getUTF8String(5);UTF8String value_9 = i.getUTF8String(6);UTF8String value_10 = i.getUTF8String(7);UTF8String value_11 = i.getUTF8String(8);UTF8String value_12 = i.getUTF8String(9);UTF8String value_13 = i.getUTF8String(10);UTF8String value_14 = i.getUTF8String(11);UTF8String value_15 = i.getUTF8String(12);UTF8String value_16 = i.getUTF8String(13);UTF8String value_17 = i.getUTF8String(14);UTF8String value_18 = i.getUTF8String(15);UTF8String value_19 = i.getUTF8String(16);UTF8String value_20 = i.getUTF8String(17);UTF8String value_21 = i.getUTF8String(18);UTF8String value_22 = i.getUTF8String(19);UTF8String value_23 = i.getUTF8String(20);UTF8String value_24 = i.getUTF8String(21);
   /* 096 */   }
   /* 097 */
   /* 098 */
   /* 099 */   private int varargBuildsConcatWs_0_1(InternalRow i, UTF8String [] array_0, int idxInVararg_0) {
   /* 100 */
   /* 101 */     array_0[idxInVararg_0 ++] = false ? (UTF8String) null : value_12;array_0[idxInVararg_0 ++] = false ? (UTF8String) null : value_13;array_0[idxInVararg_0 ++] = false ? (UTF8String) null : value_14;array_0[idxInVararg_0 ++] = false ? (UTF8String) null : value_15;array_0[idxInVararg_0 ++] = false ? (UTF8String) null : value_16;array_0[idxInVararg_0 ++] = false ? (UTF8String) null : value_17;array_0[idxInVararg_0 ++] = false ? (UTF8String) null : value_18;array_0[idxInVararg_0 ++] = false ? (UTF8String) null : value_19;array_0[idxInVararg_0 ++] = false ? (UTF8String) null : value_20;array_0[idxInVararg_0 ++] = false ? (UTF8String) null : value_21;array_0[idxInVararg_0 ++] = false ? (UTF8String) null : value_22;array_0[idxInVararg_0 ++] = false ? (UTF8String) null : value_23;array_0[idxInVararg_0 ++] = false ? (UTF8String) null : value_24;array_0[idxInVararg_0 ++] = false ? (UTF8String) null : value_25;array_0[idxInVararg_0 ++] = false ? (UTF8String) null : value_26;array_0[idxInVararg_0 ++] = false ? (UTF8String) null : value_27;
   /* 102 */     return idxInVararg_0;
   /* 103 */
   /* 104 */   }
   /* 105 */
   /* 106 */ }
   ```
   
   Compilation of the generated code fails with error message: `org.codehaus.commons.compiler.CompileException: File 'generated.java', Line 36, Column 6: Expression "isNull_2" is not an rvalue`
   
   > after the patch
   
   ```
   /* 001 */ public java.lang.Object generate(Object[] references) {
   /* 002 */   return new SpecificUnsafeProjection(references);
   /* 003 */ }
   /* 004 */
   /* 005 */ class SpecificUnsafeProjection extends org.apache.spark.sql.catalyst.expressions.UnsafeProjection {
   /* 006 */
   /* 007 */   private Object[] references;
   /* 008 */   private boolean globalIsNull_0;
   /* 009 */   private org.apache.spark.sql.catalyst.expressions.codegen.UnsafeRowWriter[] mutableStateArray_0 = new org.apache.spark.sql.catalyst.expressions.codegen.UnsafeRowWriter[1];
   /* 010 */
   /* 011 */   public SpecificUnsafeProjection(Object[] references) {
   /* 012 */     this.references = references;
   /* 013 */
   /* 014 */     mutableStateArray_0[0] = new org.apache.spark.sql.catalyst.expressions.codegen.UnsafeRowWriter(1, 32);
   /* 015 */
   /* 016 */   }
   /* 017 */
   /* 018 */   public void initialize(int partitionIndex) {
   /* 019 */
   /* 020 */   }
   /* 021 */
   /* 022 */   // Scala.Function1 need this
   /* 023 */   public java.lang.Object apply(java.lang.Object row) {
   /* 024 */     return apply((InternalRow) row);
   /* 025 */   }
   /* 026 */
   /* 027 */   public UnsafeRow apply(InternalRow i) {
   /* 028 */     mutableStateArray_0[0].reset();
   /* 029 */
   /* 030 */
   /* 031 */     mutableStateArray_0[0].zeroOutNullBytes();
   /* 032 */
   /* 033 */     UTF8String value_34 = ConcatWs_0(i);
   /* 034 */     mutableStateArray_0[0].write(0, value_34);
   /* 035 */     return (mutableStateArray_0[0].getRow());
   /* 036 */   }
   /* 037 */
   /* 038 */
   /* 039 */   private UTF8String ConcatWs_0(InternalRow i) {
   /* 040 */     boolean isNull_2 = i.isNullAt(31);
   /* 041 */     ArrayData value_2 = isNull_2 ?
   /* 042 */     null : (i.getArray(31));
   /* 043 */     boolean isNull_3 = i.isNullAt(32);
   /* 044 */     ArrayData value_3 = isNull_3 ?
   /* 045 */     null : (i.getArray(32));
   /* 046 */     UTF8String value_4 = i.getUTF8String(1);
   /* 047 */     UTF8String value_5 = i.getUTF8String(2);
   /* 048 */     UTF8String value_6 = i.getUTF8String(3);
   /* 049 */     UTF8String value_7 = i.getUTF8String(4);
   /* 050 */     UTF8String value_8 = i.getUTF8String(5);
   /* 051 */     UTF8String value_9 = i.getUTF8String(6);
   /* 052 */     UTF8String value_10 = i.getUTF8String(7);
   /* 053 */     UTF8String value_11 = i.getUTF8String(8);
   /* 054 */     UTF8String value_12 = i.getUTF8String(9);
   /* 055 */     UTF8String value_13 = i.getUTF8String(10);
   /* 056 */     UTF8String value_14 = i.getUTF8String(11);
   /* 057 */     UTF8String value_15 = i.getUTF8String(12);
   /* 058 */     UTF8String value_16 = i.getUTF8String(13);
   /* 059 */     UTF8String value_17 = i.getUTF8String(14);
   /* 060 */     UTF8String value_18 = i.getUTF8String(15);
   /* 061 */     UTF8String value_19 = i.getUTF8String(16);
   /* 062 */     UTF8String value_20 = i.getUTF8String(17);
   /* 063 */     UTF8String value_21 = i.getUTF8String(18);
   /* 064 */     UTF8String value_22 = i.getUTF8String(19);
   /* 065 */     UTF8String value_23 = i.getUTF8String(20);
   /* 066 */     UTF8String value_24 = i.getUTF8String(21);
   /* 067 */     UTF8String value_25 = i.getUTF8String(22);
   /* 068 */     UTF8String value_26 = i.getUTF8String(23);
   /* 069 */     UTF8String value_27 = i.getUTF8String(24);
   /* 070 */     UTF8String value_28 = i.getUTF8String(25);
   /* 071 */     UTF8String value_29 = i.getUTF8String(26);
   /* 072 */     UTF8String value_30 = i.getUTF8String(27);
   /* 073 */     UTF8String value_31 = i.getUTF8String(28);
   /* 074 */     UTF8String value_32 = i.getUTF8String(29);
   /* 075 */     UTF8String value_33 = i.getUTF8String(30);
   /* 076 */     int varargNum_0 = 30;
   /* 077 */     int idxInVararg_0 = 0;
   /* 078 */
   /* 079 */     if (!isNull_2) {
   /* 080 */       varargNum_0 += value_2.numElements();
   /* 081 */     }
   /* 082 */
   /* 083 */
   /* 084 */     if (!isNull_3) {
   /* 085 */       varargNum_0 += value_3.numElements();
   /* 086 */     }
   /* 087 */
   /* 088 */
   /* 089 */
   /* 090 */
   /* 091 */
   /* 092 */
   /* 093 */
   /* 094 */
   /* 095 */
   /* 096 */
   /* 097 */
   /* 098 */
   /* 099 */
   /* 100 */
   /* 101 */
   /* 102 */
   /* 103 */
   /* 104 */
   /* 105 */
   /* 106 */
   /* 107 */
   /* 108 */
   /* 109 */
   /* 110 */
   /* 111 */
   /* 112 */
   /* 113 */
   /* 114 */
   /* 115 */
   /* 116 */
   /* 117 */
   /* 118 */     UTF8String[] array_0 = new UTF8String[varargNum_0];
   /* 119 */
   /* 120 */     if (!isNull_2) {
   /* 121 */       final int n_0 = value_2.numElements();
   /* 122 */       for (int j = 0; j < n_0; j ++) {
   /* 123 */         array_0[idxInVararg_0 ++] = value_2.getUTF8String(j);
   /* 124 */       }
   /* 125 */     }
   /* 126 */
   /* 127 */
   /* 128 */     if (!isNull_3) {
   /* 129 */       final int n_1 = value_3.numElements();
   /* 130 */       for (int j = 0; j < n_1; j ++) {
   /* 131 */         array_0[idxInVararg_0 ++] = value_3.getUTF8String(j);
   /* 132 */       }
   /* 133 */     }
   /* 134 */
   /* 135 */     array_0[idxInVararg_0 ++] = false ? (UTF8String) null : value_4;
   /* 136 */     array_0[idxInVararg_0 ++] = false ? (UTF8String) null : value_5;
   /* 137 */     array_0[idxInVararg_0 ++] = false ? (UTF8String) null : value_6;
   /* 138 */     array_0[idxInVararg_0 ++] = false ? (UTF8String) null : value_7;
   /* 139 */     array_0[idxInVararg_0 ++] = false ? (UTF8String) null : value_8;
   /* 140 */     array_0[idxInVararg_0 ++] = false ? (UTF8String) null : value_9;
   /* 141 */     array_0[idxInVararg_0 ++] = false ? (UTF8String) null : value_10;
   /* 142 */     array_0[idxInVararg_0 ++] = false ? (UTF8String) null : value_11;
   /* 143 */     array_0[idxInVararg_0 ++] = false ? (UTF8String) null : value_12;
   /* 144 */     array_0[idxInVararg_0 ++] = false ? (UTF8String) null : value_13;
   /* 145 */     array_0[idxInVararg_0 ++] = false ? (UTF8String) null : value_14;
   /* 146 */     array_0[idxInVararg_0 ++] = false ? (UTF8String) null : value_15;
   /* 147 */     array_0[idxInVararg_0 ++] = false ? (UTF8String) null : value_16;
   /* 148 */     array_0[idxInVararg_0 ++] = false ? (UTF8String) null : value_17;
   /* 149 */     array_0[idxInVararg_0 ++] = false ? (UTF8String) null : value_18;
   /* 150 */     array_0[idxInVararg_0 ++] = false ? (UTF8String) null : value_19;
   /* 151 */     array_0[idxInVararg_0 ++] = false ? (UTF8String) null : value_20;
   /* 152 */     array_0[idxInVararg_0 ++] = false ? (UTF8String) null : value_21;
   /* 153 */     array_0[idxInVararg_0 ++] = false ? (UTF8String) null : value_22;
   /* 154 */     array_0[idxInVararg_0 ++] = false ? (UTF8String) null : value_23;
   /* 155 */     array_0[idxInVararg_0 ++] = false ? (UTF8String) null : value_24;
   /* 156 */     array_0[idxInVararg_0 ++] = false ? (UTF8String) null : value_25;
   /* 157 */     array_0[idxInVararg_0 ++] = false ? (UTF8String) null : value_26;
   /* 158 */     array_0[idxInVararg_0 ++] = false ? (UTF8String) null : value_27;
   /* 159 */     array_0[idxInVararg_0 ++] = false ? (UTF8String) null : value_28;
   /* 160 */     array_0[idxInVararg_0 ++] = false ? (UTF8String) null : value_29;
   /* 161 */     array_0[idxInVararg_0 ++] = false ? (UTF8String) null : value_30;
   /* 162 */     array_0[idxInVararg_0 ++] = false ? (UTF8String) null : value_31;
   /* 163 */     array_0[idxInVararg_0 ++] = false ? (UTF8String) null : value_32;
   /* 164 */     array_0[idxInVararg_0 ++] = false ? (UTF8String) null : value_33;
   /* 165 */     UTF8String value_0 = UTF8String.concatWs(((UTF8String) references[0] /* literal */), array_0);
   /* 166 */     boolean isNull_0 = value_0 == null;
   /* 167 */     globalIsNull_0 = isNull_0;
   /* 168 */     return value_0;
   /* 169 */   }
   /* 170 */
   /* 171 */ }
   ```
   
   ### Why are the changes needed?
   
   The generated code in `concat_ws` fails to compile when the below conditions are met:
   
   * Plenty of columns are provided as input of `concat_ws`.
   * Above columns are mixed up with string type and array[string] type.
   * Splitting methods is triggered in `splitExpressionsWithCurrentInputs`.
     * This is a bit tricky, as the method won't split methods under whole stage codegen, as well as it will be simply no-op (inlined) if the number of blocks to convert into methods is 1.
   
   https://github.com/apache/spark/blob/a0187cd6b59a6b6bb2cadc6711bb663d4d35a844/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala#L88-L195
   
   There're three parts of generated code in `concat_ws` (`codes`, `varargCounts`, `varargBuilds`) and all parts try to split method by itself, while `varargCounts` and `varargBuilds` refer on the generated code in `codes`, hence the overall generated code fails to compile if any of part succeeds to split.
   
   ### Does this PR introduce _any_ user-facing change?
   
   No.
   
   ### How was this patch tested?
   
   New UTs added. (One for verification of the patch, another one for regression test)


----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] [spark] HeartSaVioR commented on a change in pull request #28831: [SPARK-31993][SQL] Build arrays for passing variables generated from children for 'concat_ws' with columns having at least one of array type

Posted by GitBox <gi...@apache.org>.
HeartSaVioR commented on a change in pull request #28831:
URL: https://github.com/apache/spark/pull/28831#discussion_r442563872



##########
File path: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
##########
@@ -182,7 +208,9 @@ case class ConcatWs(children: Seq[Expression])
 
       ev.copy(
         code"""
-        $codes
+        boolean[] $isNullArgs = new boolean[${children.length - 1}];
+        Object[] $valueArgs = new Object[${children.length - 1}];
+        $argBuilds

Review comment:
       If I understand correctly, evals.head is delimiter, and it's used in `UTF8String.concatWs`.




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] [spark] SparkQA removed a comment on pull request #28831: [SPARK-31993][SQL] Build arrays for passing variables generated from children for 'concat_ws' with columns having at least one of array type

Posted by GitBox <gi...@apache.org>.
SparkQA removed a comment on pull request #28831:
URL: https://github.com/apache/spark/pull/28831#issuecomment-646366637


   **[Test build #124242 has started](https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/124242/testReport)** for PR 28831 at commit [`e8f972d`](https://github.com/apache/spark/commit/e8f972db9baf05778339d01335e1422155672017).


----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] [spark] viirya commented on a change in pull request #28831: [SPARK-31993][SQL] Build arrays for passing variables generated from children for 'concat_ws' with columns having at least one of array type

Posted by GitBox <gi...@apache.org>.
viirya commented on a change in pull request #28831:
URL: https://github.com/apache/spark/pull/28831#discussion_r442564718



##########
File path: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
##########
@@ -118,47 +118,72 @@ case class ConcatWs(children: Seq[Expression])
         boolean ${ev.isNull} = ${ev.value} == null;
       """)
     } else {
+      val isNullArgs = ctx.freshName("isNullArgs")
+      val valueArgs = ctx.freshName("valueArgs")
+
       val array = ctx.freshName("array")
       val varargNum = ctx.freshName("varargNum")
       val idxVararg = ctx.freshName("idxInVararg")
 
       val evals = children.map(_.genCode(ctx))
-      val (varargCount, varargBuild) = children.tail.zip(evals.tail).map { case (child, eval) =>
-        child.dataType match {
+      val (argBuild, varargCount, varargBuild) = children.tail.zip(evals.tail)
+        .zipWithIndex.map { case ((child, eval), idx) =>
+        val reprForIsNull = s"$isNullArgs[$idx]"
+        val reprForValue = s"$valueArgs[$idx]"
+
+        val arg =
+          s"""
+           ${eval.code}
+           $reprForIsNull = ${eval.isNull};
+           $reprForValue = ${eval.value};
+           """
+
+        val (varCount, varBuild) = child.dataType match {
           case StringType =>
+            val reprForValueCast = s"((UTF8String) $valueArgs[$idx])"
             ("", // we count all the StringType arguments num at once below.
-             if (eval.isNull == TrueLiteral) {
-               ""
-             } else {
-               s"$array[$idxVararg ++] = ${eval.isNull} ? (UTF8String) null : ${eval.value};"
-             })
+              if (eval.isNull == TrueLiteral) {
+                ""
+              } else {
+                s"$array[$idxVararg ++] = $reprForIsNull ? (UTF8String) null : $reprForValueCast;"
+              })
           case _: ArrayType =>
+            val reprForValueCast = s"((ArrayData) $valueArgs[$idx])"

Review comment:
       s"((ArrayData) $reprForValue)"?




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] [spark] HeartSaVioR commented on pull request #28831: [SPARK-31993][SQL] Evaluate children code whenever needed in both varargCounts/varargBuilds for 'concat_ws' for mixed string/array types of columns

Posted by GitBox <gi...@apache.org>.
HeartSaVioR commented on pull request #28831:
URL: https://github.com/apache/spark/pull/28831#issuecomment-645724402


   I think the paths are `all string` vs `string/array[string]`, so it falls into latter case. Not sure how to make it shorten. Would `columns having any of array type` be clearer?


----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] [spark] AmplabJenkins commented on pull request #28831: [SPARK-31993][SQL] Build arrays for passing variables generated from children for 'concat_ws' with columns having at least one of array type

Posted by GitBox <gi...@apache.org>.
AmplabJenkins commented on pull request #28831:
URL: https://github.com/apache/spark/pull/28831#issuecomment-646445485






----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] [spark] AmplabJenkins removed a comment on pull request #28831: [SPARK-31993][SQL] Build arrays for passing variables generated from children for 'concat_ws' with columns having at least one of array type

Posted by GitBox <gi...@apache.org>.
AmplabJenkins removed a comment on pull request #28831:
URL: https://github.com/apache/spark/pull/28831#issuecomment-646345529


   Test FAILed.
   Refer to this link for build results (access rights to CI server needed): 
   https://amplab.cs.berkeley.edu/jenkins//job/SparkPullRequestBuilder/124238/
   Test FAILed.


----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] [spark] AmplabJenkins removed a comment on pull request #28831: [SPARK-31993][SQL] Don't split code blocks in generated code for 'concat_ws' for mixed string/array types of columns

Posted by GitBox <gi...@apache.org>.
AmplabJenkins removed a comment on pull request #28831:
URL: https://github.com/apache/spark/pull/28831#issuecomment-644026181


   Merged build finished. Test FAILed.


----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] [spark] cloud-fan commented on pull request #28831: [SPARK-31993][SQL] Build arrays for passing variables generated from children for 'concat_ws' with columns having at least one of array type

Posted by GitBox <gi...@apache.org>.
cloud-fan commented on pull request #28831:
URL: https://github.com/apache/spark/pull/28831#issuecomment-646453830


   A general suggestion: We don't need to put the entire generated code in the PR description. We should just put the most important pieces, and use `...` to indicate other non-related code.


----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] [spark] HeartSaVioR commented on pull request #28831: [SPARK-31993][SQL] Evaluate children code whenever needed in both varargCounts/varargBuilds for 'concat_ws' for mixed string/array types of columns

Posted by GitBox <gi...@apache.org>.
HeartSaVioR commented on pull request #28831:
URL: https://github.com/apache/spark/pull/28831#issuecomment-644538409


   cc. @kiszk @viirya as initial reviews as I can see they're contributed the code part.
   cc. @bersprockets who helps to review initial patch.


----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] [spark] AmplabJenkins commented on pull request #28831: [SPARK-31993][SQL] Build arrays for passing variables generated from children for 'concat_ws' with columns having at least one of array type

Posted by GitBox <gi...@apache.org>.
AmplabJenkins commented on pull request #28831:
URL: https://github.com/apache/spark/pull/28831#issuecomment-646366969






----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] [spark] SparkQA commented on pull request #28831: [SPARK-31993][SQL] Build arrays for passing variables generated from children for 'concat_ws' with columns having at least one of array type

Posted by GitBox <gi...@apache.org>.
SparkQA commented on pull request #28831:
URL: https://github.com/apache/spark/pull/28831#issuecomment-646345511


   **[Test build #124238 has finished](https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/124238/testReport)** for PR 28831 at commit [`9b12519`](https://github.com/apache/spark/commit/9b12519d3b1a72a14164710f1e42727ca70fc4dd).
    * This patch **fails Scala style tests**.
    * This patch merges cleanly.
    * This patch adds no public classes.


----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] [spark] cloud-fan commented on pull request #28831: [SPARK-31993][SQL] Evaluate children code whenever needed in both varargCounts/varargBuilds for 'concat_ws' with columns having at least one of array type

Posted by GitBox <gi...@apache.org>.
cloud-fan commented on pull request #28831:
URL: https://github.com/apache/spark/pull/28831#issuecomment-645834944


   I haven't looked into the PR yet, but from the error message, AFAIK there are 2 solutions:
   1. make `isNull_2` a class variable using `ctx.addMutableState`. See `CaseWhen` as an example
   2. pass `isNull_2` as a parameter to the split methods.


----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] [spark] SparkQA removed a comment on pull request #28831: [SPARK-31993][SQL] Build arrays for passing variables generated from children for 'concat_ws' with columns having at least one of array type

Posted by GitBox <gi...@apache.org>.
SparkQA removed a comment on pull request #28831:
URL: https://github.com/apache/spark/pull/28831#issuecomment-646345044


   **[Test build #124238 has started](https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/124238/testReport)** for PR 28831 at commit [`9b12519`](https://github.com/apache/spark/commit/9b12519d3b1a72a14164710f1e42727ca70fc4dd).


----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] [spark] AmplabJenkins commented on pull request #28831: [SPARK-31993][SQL] Build arrays for passing variables generated from children for 'concat_ws' with columns having at least one of array type

Posted by GitBox <gi...@apache.org>.
AmplabJenkins commented on pull request #28831:
URL: https://github.com/apache/spark/pull/28831#issuecomment-646345225






----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] [spark] AmplabJenkins removed a comment on pull request #28831: [SPARK-31993][SQL] Build arrays for passing variables generated from children for 'concat_ws' with columns having at least one of array type

Posted by GitBox <gi...@apache.org>.
AmplabJenkins removed a comment on pull request #28831:
URL: https://github.com/apache/spark/pull/28831#issuecomment-646345225


   Merged build finished. Test FAILed.


----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] [spark] HeartSaVioR commented on a change in pull request #28831: [SPARK-31993][SQL] Evaluate children code whenever needed in both varargCounts/varargBuilds for 'concat_ws' for mixed string/array types of columns

Posted by GitBox <gi...@apache.org>.
HeartSaVioR commented on a change in pull request #28831:
URL: https://github.com/apache/spark/pull/28831#discussion_r440592664



##########
File path: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
##########
@@ -123,26 +123,41 @@ case class ConcatWs(children: Seq[Expression])
       val idxVararg = ctx.freshName("idxInVararg")
 
       val evals = children.map(_.genCode(ctx))
+      val supportSplitExpressions = ctx.supportSplitExpressionsWithCurrentInputs

Review comment:
       We may be able to employ some threshold of the number of children or fallback logic if we want to also exclude the case where the number of blocks for all three parts are all 1, but that's going to be complicated a bit, so would like to hear others' voices before applying.




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] [spark] SparkQA commented on pull request #28831: [SPARK-31993][SQL] Evaluate children code whenever needed in both varargCounts/varargBuilds for 'concat_ws' for mixed string/array types of columns

Posted by GitBox <gi...@apache.org>.
SparkQA commented on pull request #28831:
URL: https://github.com/apache/spark/pull/28831#issuecomment-644583204


   **[Test build #124111 has started](https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/124111/testReport)** for PR 28831 at commit [`d75ad5a`](https://github.com/apache/spark/commit/d75ad5aee7d24f4037b950137d529ed2ed3fb1cb).


----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] [spark] AmplabJenkins removed a comment on pull request #28831: [SPARK-31993][SQL] Evaluate children code whenever needed in both varargCounts/varargBuilds for 'concat_ws' for mixed string/array types of columns

Posted by GitBox <gi...@apache.org>.
AmplabJenkins removed a comment on pull request #28831:
URL: https://github.com/apache/spark/pull/28831#issuecomment-644537071






----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] [spark] viirya commented on a change in pull request #28831: [SPARK-31993][SQL] Build arrays for passing variables generated from children for 'concat_ws' with columns having at least one of array type

Posted by GitBox <gi...@apache.org>.
viirya commented on a change in pull request #28831:
URL: https://github.com/apache/spark/pull/28831#discussion_r442563136



##########
File path: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
##########
@@ -182,7 +208,9 @@ case class ConcatWs(children: Seq[Expression])
 
       ev.copy(
         code"""
-        $codes
+        boolean[] $isNullArgs = new boolean[${children.length - 1}];
+        Object[] $valueArgs = new Object[${children.length - 1}];
+        $argBuilds

Review comment:
       Don't we need to evaluate `evals.head.code`? Previously it is in `codes` but now `argBuilds` only includes `children.tail`.




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] [spark] AmplabJenkins removed a comment on pull request #28831: [SPARK-31993][SQL] Evaluate children code whenever needed in both varargCounts/varargBuilds for 'concat_ws' for mixed string/array types of columns

Posted by GitBox <gi...@apache.org>.
AmplabJenkins removed a comment on pull request #28831:
URL: https://github.com/apache/spark/pull/28831#issuecomment-644575049






----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] [spark] SparkQA removed a comment on pull request #28831: [SPARK-31993][SQL] Evaluate children code whenever needed in both varargCounts/varargBuilds for 'concat_ws' for mixed string/array types of columns

Posted by GitBox <gi...@apache.org>.
SparkQA removed a comment on pull request #28831:
URL: https://github.com/apache/spark/pull/28831#issuecomment-644536708






----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] [spark] HeartSaVioR commented on pull request #28831: [SPARK-31993][SQL] Build arrays for passing variables generated from children for 'concat_ws' with columns having at least one of array type

Posted by GitBox <gi...@apache.org>.
HeartSaVioR commented on pull request #28831:
URL: https://github.com/apache/spark/pull/28831#issuecomment-646454429


   Ah OK good to know. Thanks for the guidance! And thanks all for reviewing and merging!


----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] [spark] SparkQA commented on pull request #28831: [SPARK-31993][SQL] Don't split code blocks in generated code for 'concat_ws' for mixed string/array types of columns

Posted by GitBox <gi...@apache.org>.
SparkQA commented on pull request #28831:
URL: https://github.com/apache/spark/pull/28831#issuecomment-643980234


   **[Test build #124043 has started](https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/124043/testReport)** for PR 28831 at commit [`3e4ffbd`](https://github.com/apache/spark/commit/3e4ffbd381f81295c76a3f8bc822adc007aae621).


----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] [spark] AmplabJenkins commented on pull request #28831: [SPARK-31993][SQL] Evaluate children code whenever needed in both varargCounts/varargBuilds for 'concat_ws' for mixed string/array types of columns

Posted by GitBox <gi...@apache.org>.
AmplabJenkins commented on pull request #28831:
URL: https://github.com/apache/spark/pull/28831#issuecomment-644737298






----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] [spark] AmplabJenkins removed a comment on pull request #28831: [SPARK-31993][SQL] Build arrays for passing variables generated from children for 'concat_ws' with columns having at least one of array type

Posted by GitBox <gi...@apache.org>.
AmplabJenkins removed a comment on pull request #28831:
URL: https://github.com/apache/spark/pull/28831#issuecomment-646345231






----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] [spark] HeartSaVioR edited a comment on pull request #28831: [SPARK-31993][SQL] Evaluate children code whenever needed in both varargCounts/varargBuilds for 'concat_ws' with columns having at least one of array type

Posted by GitBox <gi...@apache.org>.
HeartSaVioR edited a comment on pull request #28831:
URL: https://github.com/apache/spark/pull/28831#issuecomment-645973967


   Please go through the generated code for "before the patch". It's not just isNull - compiler just found it in prior. All variables children defines during codegen can be missing.
   
   EDIT: I guess I already did you’re suggesting in new commit, passing boolean[] and Object[].


----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] [spark] AmplabJenkins commented on pull request #28831: [SPARK-31993][SQL] Build arrays for passing variables generated from children for 'concat_ws' with columns having at least one of array type

Posted by GitBox <gi...@apache.org>.
AmplabJenkins commented on pull request #28831:
URL: https://github.com/apache/spark/pull/28831#issuecomment-646420162






----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] [spark] HeartSaVioR commented on a change in pull request #28831: [SPARK-31993][SQL] Evaluate children code whenever needed in both varargCounts/varargBuilds for 'concat_ws' with columns having at least one of array type

Posted by GitBox <gi...@apache.org>.
HeartSaVioR commented on a change in pull request #28831:
URL: https://github.com/apache/spark/pull/28831#discussion_r442065192



##########
File path: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
##########
@@ -123,26 +123,41 @@ case class ConcatWs(children: Seq[Expression])
       val idxVararg = ctx.freshName("idxInVararg")
 
       val evals = children.map(_.genCode(ctx))
+      val supportSplitExpressions = ctx.supportSplitExpressionsWithCurrentInputs
+
+      // Here we apply different approaches according to the availability of splitting expressions.

Review comment:
       I'll go through the comment. Thanks.




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] [spark] AmplabJenkins commented on pull request #28831: [SPARK-31993][SQL] Don't split code blocks in generated code for 'concat_ws' for mixed string/array types of columns

Posted by GitBox <gi...@apache.org>.
AmplabJenkins commented on pull request #28831:
URL: https://github.com/apache/spark/pull/28831#issuecomment-644026181






----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] [spark] cloud-fan commented on pull request #28831: [SPARK-31993][SQL] Build arrays for passing variables generated from children for 'concat_ws' with columns having at least one of array type

Posted by GitBox <gi...@apache.org>.
cloud-fan commented on pull request #28831:
URL: https://github.com/apache/spark/pull/28831#issuecomment-646451995


   thanks, merging to master! (I think this one is too big to backport)


----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] [spark] HeartSaVioR edited a comment on pull request #28831: [SPARK-31993][SQL] Evaluate children code whenever needed in both varargCounts/varargBuilds for 'concat_ws' with columns having at least one of array type

Posted by GitBox <gi...@apache.org>.
HeartSaVioR edited a comment on pull request #28831:
URL: https://github.com/apache/spark/pull/28831#issuecomment-645937769


   Thanks for the suggestion!
   
   Based on suggestion, I've experimented with option 2, but with two arrays to copy `isNull` and `value` respectively, because the number of children can be huge - existing UT sets it to 5000, which cannot be passed by parameters directly.
   
   https://github.com/HeartSaVioR/spark/commit/9b12519d3b1a72a14164710f1e42727ca70fc4dd
   
   Below is the generated code running the same UT in PR description: 
   
   ```
   /* 001 */ public java.lang.Object generate(Object[] references) {
   /* 002 */   return new SpecificUnsafeProjection(references);
   /* 003 */ }
   /* 004 */
   /* 005 */ class SpecificUnsafeProjection extends org.apache.spark.sql.catalyst.expressions.UnsafeProjection {
   /* 006 */
   /* 007 */   private Object[] references;
   /* 008 */   private boolean globalIsNull_0;
   /* 009 */   private org.apache.spark.sql.catalyst.expressions.codegen.UnsafeRowWriter[] mutableStateArray_0 = new org.apache.spark.sql.catalyst.expressions.codegen.UnsafeRowWriter[1];
   /* 010 */
   /* 011 */   public SpecificUnsafeProjection(Object[] references) {
   /* 012 */     this.references = references;
   /* 013 */
   /* 014 */     mutableStateArray_0[0] = new org.apache.spark.sql.catalyst.expressions.codegen.UnsafeRowWriter(1, 32);
   /* 015 */
   /* 016 */   }
   /* 017 */
   /* 018 */   public void initialize(int partitionIndex) {
   /* 019 */
   /* 020 */   }
   /* 021 */
   /* 022 */   // Scala.Function1 need this
   /* 023 */   public java.lang.Object apply(java.lang.Object row) {
   /* 024 */     return apply((InternalRow) row);
   /* 025 */   }
   /* 026 */
   /* 027 */   public UnsafeRow apply(InternalRow i) {
   /* 028 */     mutableStateArray_0[0].reset();
   /* 029 */
   /* 030 */
   /* 031 */     mutableStateArray_0[0].zeroOutNullBytes();
   /* 032 */
   /* 033 */     UTF8String value_34 = ConcatWs_0(i);
   /* 034 */     mutableStateArray_0[0].write(0, value_34);
   /* 035 */     return (mutableStateArray_0[0].getRow());
   /* 036 */   }
   /* 037 */
   /* 038 */
   /* 039 */   private void initializeArgsArrays_0_0(InternalRow i, boolean [] isNullArgs_0, Object [] valueArgs_0) {
   /* 040 */
   /* 041 */     boolean isNull_2 = i.isNullAt(31);
   /* 042 */     ArrayData value_2 = isNull_2 ?
   /* 043 */     null : (i.getArray(31));
   /* 044 */     isNullArgs_0[0] = isNull_2;
   /* 045 */     valueArgs_0[0] = value_2;
   /* 046 */
   /* 047 */     boolean isNull_3 = i.isNullAt(32);
   /* 048 */     ArrayData value_3 = isNull_3 ?
   /* 049 */     null : (i.getArray(32));
   /* 050 */     isNullArgs_0[1] = isNull_3;
   /* 051 */     valueArgs_0[1] = value_3;
   /* 052 */
   /* 053 */     UTF8String value_4 = i.getUTF8String(1);
   /* 054 */     isNullArgs_0[2] = false;
   /* 055 */     valueArgs_0[2] = value_4;
   /* 056 */
   /* 057 */     UTF8String value_5 = i.getUTF8String(2);
   /* 058 */     isNullArgs_0[3] = false;
   /* 059 */     valueArgs_0[3] = value_5;
   /* 060 */
   /* 061 */     UTF8String value_6 = i.getUTF8String(3);
   /* 062 */     isNullArgs_0[4] = false;
   /* 063 */     valueArgs_0[4] = value_6;
   /* 064 */
   /* 065 */     UTF8String value_7 = i.getUTF8String(4);
   /* 066 */     isNullArgs_0[5] = false;
   /* 067 */     valueArgs_0[5] = value_7;
   /* 068 */
   /* 069 */     UTF8String value_8 = i.getUTF8String(5);
   /* 070 */     isNullArgs_0[6] = false;
   /* 071 */     valueArgs_0[6] = value_8;
   /* 072 */
   /* 073 */   }
   /* 074 */
   /* 075 */
   /* 076 */   private void initializeArgsArrays_0_3(InternalRow i, boolean [] isNullArgs_0, Object [] valueArgs_0) {
   /* 077 */
   /* 078 */     UTF8String value_25 = i.getUTF8String(22);
   /* 079 */     isNullArgs_0[23] = false;
   /* 080 */     valueArgs_0[23] = value_25;
   /* 081 */
   /* 082 */     UTF8String value_26 = i.getUTF8String(23);
   /* 083 */     isNullArgs_0[24] = false;
   /* 084 */     valueArgs_0[24] = value_26;
   /* 085 */
   /* 086 */     UTF8String value_27 = i.getUTF8String(24);
   /* 087 */     isNullArgs_0[25] = false;
   /* 088 */     valueArgs_0[25] = value_27;
   /* 089 */
   /* 090 */     UTF8String value_28 = i.getUTF8String(25);
   /* 091 */     isNullArgs_0[26] = false;
   /* 092 */     valueArgs_0[26] = value_28;
   /* 093 */
   /* 094 */     UTF8String value_29 = i.getUTF8String(26);
   /* 095 */     isNullArgs_0[27] = false;
   /* 096 */     valueArgs_0[27] = value_29;
   /* 097 */
   /* 098 */     UTF8String value_30 = i.getUTF8String(27);
   /* 099 */     isNullArgs_0[28] = false;
   /* 100 */     valueArgs_0[28] = value_30;
   /* 101 */
   /* 102 */     UTF8String value_31 = i.getUTF8String(28);
   /* 103 */     isNullArgs_0[29] = false;
   /* 104 */     valueArgs_0[29] = value_31;
   /* 105 */
   /* 106 */     UTF8String value_32 = i.getUTF8String(29);
   /* 107 */     isNullArgs_0[30] = false;
   /* 108 */     valueArgs_0[30] = value_32;
   /* 109 */
   /* 110 */   }
   /* 111 */
   /* 112 */
   /* 113 */   private int varargBuildsConcatWs_0_3(InternalRow i, UTF8String [] array_0, int idxInVararg_0, boolean [] isNullArgs_0, Object [] valueArgs_0) {
   /* 114 */
   /* 115 */     array_0[idxInVararg_0 ++] = isNullArgs_0[29] ? (UTF8String) null : ((UTF8String) valueArgs_0[29]);array_0[idxInVararg_0 ++] = isNullArgs_0[30] ? (UTF8String) null : ((UTF8String) valueArgs_0[30]);array_0[idxInVararg_0 ++] = isNullArgs_0[31] ? (UTF8String) null : ((UTF8String) valueArgs_0[31]);
   /* 116 */     return idxInVararg_0;
   /* 117 */
   /* 118 */   }
   /* 119 */
   /* 120 */
   /* 121 */   private int varargBuildsConcatWs_0_0(InternalRow i, UTF8String [] array_0, int idxInVararg_0, boolean [] isNullArgs_0, Object [] valueArgs_0) {
   /* 122 */
   /* 123 */
   /* 124 */     if (!isNullArgs_0[0]) {
   /* 125 */       final int n_0 = ((ArrayData) valueArgs_0[0]).numElements();
   /* 126 */       for (int j = 0; j < n_0; j ++) {
   /* 127 */         array_0[idxInVararg_0 ++] = ((ArrayData) valueArgs_0[0]).getUTF8String(j);
   /* 128 */       }
   /* 129 */     }
   /* 130 */
   /* 131 */     if (!isNullArgs_0[1]) {
   /* 132 */       final int n_1 = ((ArrayData) valueArgs_0[1]).numElements();
   /* 133 */       for (int j = 0; j < n_1; j ++) {
   /* 134 */         array_0[idxInVararg_0 ++] = ((ArrayData) valueArgs_0[1]).getUTF8String(j);
   /* 135 */       }
   /* 136 */     }
   /* 137 */     array_0[idxInVararg_0 ++] = isNullArgs_0[2] ? (UTF8String) null : ((UTF8String) valueArgs_0[2]);array_0[idxInVararg_0 ++] = isNullArgs_0[3] ? (UTF8String) null : ((UTF8String) valueArgs_0[3]);array_0[idxInVararg_0 ++] = isNullArgs_0[4] ? (UTF8String) null : ((UTF8String) valueArgs_0[4]);array_0[idxInVararg_0 ++] = isNullArgs_0[5] ? (UTF8String) null : ((UTF8String) valueArgs_0[5]);array_0[idxInVararg_0 ++] = isNullArgs_0[6] ? (UTF8String) null : ((UTF8String) valueArgs_0[6]);
   /* 138 */     return idxInVararg_0;
   /* 139 */
   /* 140 */   }
   /* 141 */
   /* 142 */
   /* 143 */   private UTF8String ConcatWs_0(InternalRow i) {
   /* 144 */     boolean[] isNullArgs_0 = new boolean[32];
   /* 145 */     Object[] valueArgs_0 = new Object[32];
   /* 146 */     initializeArgsArrays_0_0(i, isNullArgs_0, valueArgs_0);
   /* 147 */     initializeArgsArrays_0_1(i, isNullArgs_0, valueArgs_0);
   /* 148 */     initializeArgsArrays_0_2(i, isNullArgs_0, valueArgs_0);
   /* 149 */     initializeArgsArrays_0_3(i, isNullArgs_0, valueArgs_0);
   /* 150 */     initializeArgsArrays_0_4(i, isNullArgs_0, valueArgs_0);
   /* 151 */     int varargNum_0 = 30;
   /* 152 */     int idxInVararg_0 = 0;
   /* 153 */
   /* 154 */     if (!isNullArgs_0[0]) {
   /* 155 */       varargNum_0 += ((ArrayData) valueArgs_0[0]).numElements();
   /* 156 */     }
   /* 157 */
   /* 158 */     if (!isNullArgs_0[1]) {
   /* 159 */       varargNum_0 += ((ArrayData) valueArgs_0[1]).numElements();
   /* 160 */     }
   /* 161 */
   /* 162 */     UTF8String[] array_0 = new UTF8String[varargNum_0];
   /* 163 */     idxInVararg_0 = varargBuildsConcatWs_0_0(i, array_0, idxInVararg_0, isNullArgs_0, valueArgs_0);
   /* 164 */     idxInVararg_0 = varargBuildsConcatWs_0_1(i, array_0, idxInVararg_0, isNullArgs_0, valueArgs_0);
   /* 165 */     idxInVararg_0 = varargBuildsConcatWs_0_2(i, array_0, idxInVararg_0, isNullArgs_0, valueArgs_0);
   /* 166 */     idxInVararg_0 = varargBuildsConcatWs_0_3(i, array_0, idxInVararg_0, isNullArgs_0, valueArgs_0);
   /* 167 */     UTF8String value_0 = UTF8String.concatWs(((UTF8String) references[0] /* literal */), array_0);
   /* 168 */     boolean isNull_0 = value_0 == null;
   /* 169 */     globalIsNull_0 = isNull_0;
   /* 170 */     return value_0;
   /* 171 */   }
   /* 172 */
   /* 173 */
   /* 174 */   private void initializeArgsArrays_0_2(InternalRow i, boolean [] isNullArgs_0, Object [] valueArgs_0) {
   /* 175 */
   /* 176 */     UTF8String value_17 = i.getUTF8String(14);
   /* 177 */     isNullArgs_0[15] = false;
   /* 178 */     valueArgs_0[15] = value_17;
   /* 179 */
   /* 180 */     UTF8String value_18 = i.getUTF8String(15);
   /* 181 */     isNullArgs_0[16] = false;
   /* 182 */     valueArgs_0[16] = value_18;
   /* 183 */
   /* 184 */     UTF8String value_19 = i.getUTF8String(16);
   /* 185 */     isNullArgs_0[17] = false;
   /* 186 */     valueArgs_0[17] = value_19;
   /* 187 */
   /* 188 */     UTF8String value_20 = i.getUTF8String(17);
   /* 189 */     isNullArgs_0[18] = false;
   /* 190 */     valueArgs_0[18] = value_20;
   /* 191 */
   /* 192 */     UTF8String value_21 = i.getUTF8String(18);
   /* 193 */     isNullArgs_0[19] = false;
   /* 194 */     valueArgs_0[19] = value_21;
   /* 195 */
   /* 196 */     UTF8String value_22 = i.getUTF8String(19);
   /* 197 */     isNullArgs_0[20] = false;
   /* 198 */     valueArgs_0[20] = value_22;
   /* 199 */
   /* 200 */     UTF8String value_23 = i.getUTF8String(20);
   /* 201 */     isNullArgs_0[21] = false;
   /* 202 */     valueArgs_0[21] = value_23;
   /* 203 */
   /* 204 */     UTF8String value_24 = i.getUTF8String(21);
   /* 205 */     isNullArgs_0[22] = false;
   /* 206 */     valueArgs_0[22] = value_24;
   /* 207 */
   /* 208 */   }
   /* 209 */
   /* 210 */
   /* 211 */   private int varargBuildsConcatWs_0_2(InternalRow i, UTF8String [] array_0, int idxInVararg_0, boolean [] isNullArgs_0, Object [] valueArgs_0) {
   /* 212 */
   /* 213 */     array_0[idxInVararg_0 ++] = isNullArgs_0[18] ? (UTF8String) null : ((UTF8String) valueArgs_0[18]);array_0[idxInVararg_0 ++] = isNullArgs_0[19] ? (UTF8String) null : ((UTF8String) valueArgs_0[19]);array_0[idxInVararg_0 ++] = isNullArgs_0[20] ? (UTF8String) null : ((UTF8String) valueArgs_0[20]);array_0[idxInVararg_0 ++] = isNullArgs_0[21] ? (UTF8String) null : ((UTF8String) valueArgs_0[21]);array_0[idxInVararg_0 ++] = isNullArgs_0[22] ? (UTF8String) null : ((UTF8String) valueArgs_0[22]);array_0[idxInVararg_0 ++] = isNullArgs_0[23] ? (UTF8String) null : ((UTF8String) valueArgs_0[23]);array_0[idxInVararg_0 ++] = isNullArgs_0[24] ? (UTF8String) null : ((UTF8String) valueArgs_0[24]);array_0[idxInVararg_0 ++] = isNullArgs_0[25] ? (UTF8String) null : ((UTF8String) valueArgs_0[25]);array_0[idxInVararg_0 ++] = isNullArgs_0[26] ? (UTF8String) null : ((UTF8String) valueArgs_0[26]);array_0[idxInVararg_0 ++] = isNullArgs_0[27] ? (UTF8String) null : ((UTF8String) valueArgs_0[27]);array_0[idxInVararg_0 ++] = isNullArgs_0[28] ? (UTF8String) null : ((UTF8String) valueArgs_0[28]);
   /* 214 */     return idxInVararg_0;
   /* 215 */
   /* 216 */   }
   /* 217 */
   /* 218 */
   /* 219 */   private void initializeArgsArrays_0_4(InternalRow i, boolean [] isNullArgs_0, Object [] valueArgs_0) {
   /* 220 */
   /* 221 */     UTF8String value_33 = i.getUTF8String(30);
   /* 222 */     isNullArgs_0[31] = false;
   /* 223 */     valueArgs_0[31] = value_33;
   /* 224 */
   /* 225 */   }
   /* 226 */
   /* 227 */
   /* 228 */   private void initializeArgsArrays_0_1(InternalRow i, boolean [] isNullArgs_0, Object [] valueArgs_0) {
   /* 229 */
   /* 230 */     UTF8String value_9 = i.getUTF8String(6);
   /* 231 */     isNullArgs_0[7] = false;
   /* 232 */     valueArgs_0[7] = value_9;
   /* 233 */
   /* 234 */     UTF8String value_10 = i.getUTF8String(7);
   /* 235 */     isNullArgs_0[8] = false;
   /* 236 */     valueArgs_0[8] = value_10;
   /* 237 */
   /* 238 */     UTF8String value_11 = i.getUTF8String(8);
   /* 239 */     isNullArgs_0[9] = false;
   /* 240 */     valueArgs_0[9] = value_11;
   /* 241 */
   /* 242 */     UTF8String value_12 = i.getUTF8String(9);
   /* 243 */     isNullArgs_0[10] = false;
   /* 244 */     valueArgs_0[10] = value_12;
   /* 245 */
   /* 246 */     UTF8String value_13 = i.getUTF8String(10);
   /* 247 */     isNullArgs_0[11] = false;
   /* 248 */     valueArgs_0[11] = value_13;
   /* 249 */
   /* 250 */     UTF8String value_14 = i.getUTF8String(11);
   /* 251 */     isNullArgs_0[12] = false;
   /* 252 */     valueArgs_0[12] = value_14;
   /* 253 */
   /* 254 */     UTF8String value_15 = i.getUTF8String(12);
   /* 255 */     isNullArgs_0[13] = false;
   /* 256 */     valueArgs_0[13] = value_15;
   /* 257 */
   /* 258 */     UTF8String value_16 = i.getUTF8String(13);
   /* 259 */     isNullArgs_0[14] = false;
   /* 260 */     valueArgs_0[14] = value_16;
   /* 261 */
   /* 262 */   }
   /* 263 */
   /* 264 */
   /* 265 */   private int varargBuildsConcatWs_0_1(InternalRow i, UTF8String [] array_0, int idxInVararg_0, boolean [] isNullArgs_0, Object [] valueArgs_0) {
   /* 266 */
   /* 267 */     array_0[idxInVararg_0 ++] = isNullArgs_0[7] ? (UTF8String) null : ((UTF8String) valueArgs_0[7]);array_0[idxInVararg_0 ++] = isNullArgs_0[8] ? (UTF8String) null : ((UTF8String) valueArgs_0[8]);array_0[idxInVararg_0 ++] = isNullArgs_0[9] ? (UTF8String) null : ((UTF8String) valueArgs_0[9]);array_0[idxInVararg_0 ++] = isNullArgs_0[10] ? (UTF8String) null : ((UTF8String) valueArgs_0[10]);array_0[idxInVararg_0 ++] = isNullArgs_0[11] ? (UTF8String) null : ((UTF8String) valueArgs_0[11]);array_0[idxInVararg_0 ++] = isNullArgs_0[12] ? (UTF8String) null : ((UTF8String) valueArgs_0[12]);array_0[idxInVararg_0 ++] = isNullArgs_0[13] ? (UTF8String) null : ((UTF8String) valueArgs_0[13]);array_0[idxInVararg_0 ++] = isNullArgs_0[14] ? (UTF8String) null : ((UTF8String) valueArgs_0[14]);array_0[idxInVararg_0 ++] = isNullArgs_0[15] ? (UTF8String) null : ((UTF8String) valueArgs_0[15]);array_0[idxInVararg_0 ++] = isNullArgs_0[16] ? (UTF8String) null : ((UTF8String) valueArgs_0[16]);array_0[idxInVararg_0 ++] = isNullArgs_0[17] ? (UTF8String) null : ((UTF8String) valueArgs_0[17]);
   /* 268 */     return idxInVararg_0;
   /* 269 */
   /* 270 */   }
   /* 271 */
   /* 272 */ }
   ```
   
   Option 1 looks also viable - I guess there's also a limitation of the number of class variables so utilizing arrays looks more flexible, but it might be really edge-case and we might want to just ignore the case.
   
   I'll rebase this PR with the commit if we prefer the new commit. If we would like to go with class variables I'll try out and provide a new commit.
   
   Thanks!


----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] [spark] AmplabJenkins commented on pull request #28831: [SPARK-31993][SQL] Don't split code blocks in generated code for 'concat_ws' for mixed string/array types of columns

Posted by GitBox <gi...@apache.org>.
AmplabJenkins commented on pull request #28831:
URL: https://github.com/apache/spark/pull/28831#issuecomment-643977050






----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] [spark] AmplabJenkins removed a comment on pull request #28831: [SPARK-31993][SQL] Evaluate children code whenever needed in both varargCounts/varargBuilds for 'concat_ws' for mixed string/array types of columns

Posted by GitBox <gi...@apache.org>.
AmplabJenkins removed a comment on pull request #28831:
URL: https://github.com/apache/spark/pull/28831#issuecomment-644583693






----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] [spark] gatorsmile commented on pull request #28831: [SPARK-31993][SQL] Evaluate children code whenever needed in both varargCounts/varargBuilds for 'concat_ws' with columns having at least one of array type

Posted by GitBox <gi...@apache.org>.
gatorsmile commented on pull request #28831:
URL: https://github.com/apache/spark/pull/28831#issuecomment-645770760


   cc @rednaxelafx 


----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] [spark] HeartSaVioR commented on pull request #28831: [SPARK-31993][SQL] Evaluate children code whenever needed in both varargCounts/varargBuilds for 'concat_ws' with columns having any of array type

Posted by GitBox <gi...@apache.org>.
HeartSaVioR commented on pull request #28831:
URL: https://github.com/apache/spark/pull/28831#issuecomment-645725486


   Just updated the PR title and description, as I agree the current representation is not clear about whether it includes all array types or not. Thanks for pointing out. :)


----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] [spark] AmplabJenkins removed a comment on pull request #28831: [SPARK-31993][SQL] Build arrays for passing variables generated from children for 'concat_ws' with columns having at least one of array type

Posted by GitBox <gi...@apache.org>.
AmplabJenkins removed a comment on pull request #28831:
URL: https://github.com/apache/spark/pull/28831#issuecomment-646445485






----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] [spark] AmplabJenkins commented on pull request #28831: [SPARK-31993][SQL] Build arrays for passing variables generated from children for 'concat_ws' with columns having at least one of array type

Posted by GitBox <gi...@apache.org>.
AmplabJenkins commented on pull request #28831:
URL: https://github.com/apache/spark/pull/28831#issuecomment-646452038






----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] [spark] AmplabJenkins commented on pull request #28831: [SPARK-31993][SQL] Evaluate children code whenever needed in both varargCounts/varargBuilds for 'concat_ws' for mixed string/array types of columns

Posted by GitBox <gi...@apache.org>.
AmplabJenkins commented on pull request #28831:
URL: https://github.com/apache/spark/pull/28831#issuecomment-644583693






----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] [spark] viirya commented on a change in pull request #28831: [SPARK-31993][SQL] Build arrays for passing variables generated from children for 'concat_ws' with columns having at least one of array type

Posted by GitBox <gi...@apache.org>.
viirya commented on a change in pull request #28831:
URL: https://github.com/apache/spark/pull/28831#discussion_r442564718



##########
File path: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
##########
@@ -118,47 +118,72 @@ case class ConcatWs(children: Seq[Expression])
         boolean ${ev.isNull} = ${ev.value} == null;
       """)
     } else {
+      val isNullArgs = ctx.freshName("isNullArgs")
+      val valueArgs = ctx.freshName("valueArgs")
+
       val array = ctx.freshName("array")
       val varargNum = ctx.freshName("varargNum")
       val idxVararg = ctx.freshName("idxInVararg")
 
       val evals = children.map(_.genCode(ctx))
-      val (varargCount, varargBuild) = children.tail.zip(evals.tail).map { case (child, eval) =>
-        child.dataType match {
+      val (argBuild, varargCount, varargBuild) = children.tail.zip(evals.tail)
+        .zipWithIndex.map { case ((child, eval), idx) =>
+        val reprForIsNull = s"$isNullArgs[$idx]"
+        val reprForValue = s"$valueArgs[$idx]"
+
+        val arg =
+          s"""
+           ${eval.code}
+           $reprForIsNull = ${eval.isNull};
+           $reprForValue = ${eval.value};
+           """
+
+        val (varCount, varBuild) = child.dataType match {
           case StringType =>
+            val reprForValueCast = s"((UTF8String) $valueArgs[$idx])"
             ("", // we count all the StringType arguments num at once below.
-             if (eval.isNull == TrueLiteral) {
-               ""
-             } else {
-               s"$array[$idxVararg ++] = ${eval.isNull} ? (UTF8String) null : ${eval.value};"
-             })
+              if (eval.isNull == TrueLiteral) {
+                ""
+              } else {
+                s"$array[$idxVararg ++] = $reprForIsNull ? (UTF8String) null : $reprForValueCast;"
+              })
           case _: ArrayType =>
+            val reprForValueCast = s"((ArrayData) $valueArgs[$idx])"

Review comment:
       s"((ArrayData) $reprForValue)"

##########
File path: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
##########
@@ -118,47 +118,72 @@ case class ConcatWs(children: Seq[Expression])
         boolean ${ev.isNull} = ${ev.value} == null;
       """)
     } else {
+      val isNullArgs = ctx.freshName("isNullArgs")
+      val valueArgs = ctx.freshName("valueArgs")
+
       val array = ctx.freshName("array")
       val varargNum = ctx.freshName("varargNum")
       val idxVararg = ctx.freshName("idxInVararg")
 
       val evals = children.map(_.genCode(ctx))
-      val (varargCount, varargBuild) = children.tail.zip(evals.tail).map { case (child, eval) =>
-        child.dataType match {
+      val (argBuild, varargCount, varargBuild) = children.tail.zip(evals.tail)
+        .zipWithIndex.map { case ((child, eval), idx) =>
+        val reprForIsNull = s"$isNullArgs[$idx]"
+        val reprForValue = s"$valueArgs[$idx]"
+
+        val arg =
+          s"""
+           ${eval.code}
+           $reprForIsNull = ${eval.isNull};
+           $reprForValue = ${eval.value};
+           """
+
+        val (varCount, varBuild) = child.dataType match {
           case StringType =>
+            val reprForValueCast = s"((UTF8String) $valueArgs[$idx])"

Review comment:
       s"((UTF8String) $reprForValue)"?




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] [spark] AmplabJenkins removed a comment on pull request #28831: [WIP][SPARK-31993][SQL] Evaluate children code whenever needed in both varargCounts/varargBuilds for 'concat_ws' for mixed string/array types of columns

Posted by GitBox <gi...@apache.org>.
AmplabJenkins removed a comment on pull request #28831:
URL: https://github.com/apache/spark/pull/28831#issuecomment-644519334






----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] [spark] SparkQA commented on pull request #28831: [SPARK-31993][SQL] Build arrays for passing variables generated from children for 'concat_ws' with columns having at least one of array type

Posted by GitBox <gi...@apache.org>.
SparkQA commented on pull request #28831:
URL: https://github.com/apache/spark/pull/28831#issuecomment-646366637


   **[Test build #124242 has started](https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/124242/testReport)** for PR 28831 at commit [`e8f972d`](https://github.com/apache/spark/commit/e8f972db9baf05778339d01335e1422155672017).


----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] [spark] kiszk commented on a change in pull request #28831: [SPARK-31993][SQL] Evaluate children code whenever needed in both varargCounts/varargBuilds for 'concat_ws' with columns having at least one of array type

Posted by GitBox <gi...@apache.org>.
kiszk commented on a change in pull request #28831:
URL: https://github.com/apache/spark/pull/28831#discussion_r442030030



##########
File path: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
##########
@@ -123,26 +123,41 @@ case class ConcatWs(children: Seq[Expression])
       val idxVararg = ctx.freshName("idxInVararg")
 
       val evals = children.map(_.genCode(ctx))
+      val supportSplitExpressions = ctx.supportSplitExpressionsWithCurrentInputs
+
+      // Here we apply different approaches according to the availability of splitting expressions.

Review comment:
       It would be good to use one approach to reduce the complexity. Could you please see @cloud-fan 's comment?




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] [spark] SparkQA commented on pull request #28831: [SPARK-31993][SQL] Evaluate children code whenever needed in both varargCounts/varargBuilds for 'concat_ws' for mixed string/array types of columns

Posted by GitBox <gi...@apache.org>.
SparkQA commented on pull request #28831:
URL: https://github.com/apache/spark/pull/28831#issuecomment-644574859






----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] [spark] AmplabJenkins commented on pull request #28831: [SPARK-31993][SQL] Evaluate children code whenever needed in both varargCounts/varargBuilds for 'concat_ws' for mixed string/array types of columns

Posted by GitBox <gi...@apache.org>.
AmplabJenkins commented on pull request #28831:
URL: https://github.com/apache/spark/pull/28831#issuecomment-644575049






----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] [spark] HeartSaVioR commented on pull request #28831: [SPARK-31993][SQL] Evaluate children code whenever needed in both varargCounts/varargBuilds for 'concat_ws' for mixed string/array types of columns

Posted by GitBox <gi...@apache.org>.
HeartSaVioR commented on pull request #28831:
URL: https://github.com/apache/spark/pull/28831#issuecomment-645660608


   Let me hopefully try to expand the chance of review. cc. @cloud-fan @maropu as well


----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] [spark] AmplabJenkins removed a comment on pull request #28831: [SPARK-31993][SQL] Don't split code blocks in generated code for 'concat_ws' for mixed string/array types of columns

Posted by GitBox <gi...@apache.org>.
AmplabJenkins removed a comment on pull request #28831:
URL: https://github.com/apache/spark/pull/28831#issuecomment-644026190


   Test FAILed.
   Refer to this link for build results (access rights to CI server needed): 
   https://amplab.cs.berkeley.edu/jenkins//job/SparkPullRequestBuilder/124043/
   Test FAILed.


----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] [spark] SparkQA commented on pull request #28831: [SPARK-31993][SQL] Evaluate children code whenever needed in both varargCounts/varargBuilds for 'concat_ws' for mixed string/array types of columns

Posted by GitBox <gi...@apache.org>.
SparkQA commented on pull request #28831:
URL: https://github.com/apache/spark/pull/28831#issuecomment-644736363


   **[Test build #124111 has finished](https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/124111/testReport)** for PR 28831 at commit [`d75ad5a`](https://github.com/apache/spark/commit/d75ad5aee7d24f4037b950137d529ed2ed3fb1cb).
    * This patch passes all tests.
    * This patch merges cleanly.
    * This patch adds no public classes.


----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] [spark] AmplabJenkins removed a comment on pull request #28831: [SPARK-31993][SQL] Build arrays for passing variables generated from children for 'concat_ws' with columns having at least one of array type

Posted by GitBox <gi...@apache.org>.
AmplabJenkins removed a comment on pull request #28831:
URL: https://github.com/apache/spark/pull/28831#issuecomment-646349614






----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] [spark] HeartSaVioR commented on pull request #28831: [SPARK-31993][SQL] Evaluate children code whenever needed in both varargCounts/varargBuilds for 'concat_ws' for mixed string/array types of columns

Posted by GitBox <gi...@apache.org>.
HeartSaVioR commented on pull request #28831:
URL: https://github.com/apache/spark/pull/28831#issuecomment-644582455


   retest this, please


----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] [spark] AmplabJenkins commented on pull request #28831: [WIP][SPARK-31993][SQL] Evaluate children code whenever needed in both varargCounts/varargBuilds for 'concat_ws' for mixed string/array types of columns

Posted by GitBox <gi...@apache.org>.
AmplabJenkins commented on pull request #28831:
URL: https://github.com/apache/spark/pull/28831#issuecomment-644519334






----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] [spark] SparkQA removed a comment on pull request #28831: [SPARK-31993][SQL] Don't split code blocks in generated code for 'concat_ws' for mixed string/array types of columns

Posted by GitBox <gi...@apache.org>.
SparkQA removed a comment on pull request #28831:
URL: https://github.com/apache/spark/pull/28831#issuecomment-643980234


   **[Test build #124043 has started](https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/124043/testReport)** for PR 28831 at commit [`3e4ffbd`](https://github.com/apache/spark/commit/3e4ffbd381f81295c76a3f8bc822adc007aae621).


----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] [spark] SparkQA commented on pull request #28831: [SPARK-31993][SQL] Build arrays for passing variables generated from children for 'concat_ws' with columns having at least one of array type

Posted by GitBox <gi...@apache.org>.
SparkQA commented on pull request #28831:
URL: https://github.com/apache/spark/pull/28831#issuecomment-646451372


   **[Test build #124242 has finished](https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/124242/testReport)** for PR 28831 at commit [`e8f972d`](https://github.com/apache/spark/commit/e8f972db9baf05778339d01335e1422155672017).
    * This patch passes all tests.
    * This patch merges cleanly.
    * This patch adds no public classes.


----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] [spark] AmplabJenkins commented on pull request #28831: [SPARK-31993][SQL] Evaluate children code whenever needed in both varargCounts/varargBuilds for 'concat_ws' for mixed string/array types of columns

Posted by GitBox <gi...@apache.org>.
AmplabJenkins commented on pull request #28831:
URL: https://github.com/apache/spark/pull/28831#issuecomment-644537071






----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] [spark] HeartSaVioR edited a comment on pull request #28831: [SPARK-31993][SQL] Evaluate children code whenever needed in both varargCounts/varargBuilds for 'concat_ws' for mixed string/array types of columns

Posted by GitBox <gi...@apache.org>.
HeartSaVioR edited a comment on pull request #28831:
URL: https://github.com/apache/spark/pull/28831#issuecomment-644538409


   cc. @kiszk @viirya as initial reviewers as I can see they're contributed the code part.
   cc. @bersprockets who helped to review initial patch.


----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] [spark] AmplabJenkins removed a comment on pull request #28831: [SPARK-31993][SQL] Don't split code blocks in generated code for 'concat_ws' for mixed string/array types of columns

Posted by GitBox <gi...@apache.org>.
AmplabJenkins removed a comment on pull request #28831:
URL: https://github.com/apache/spark/pull/28831#issuecomment-643977050






----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] [spark] HeartSaVioR commented on pull request #28831: [SPARK-31993][SQL] Don't split code blocks in generated code for 'concat_ws' for mixed string/array types of columns

Posted by GitBox <gi...@apache.org>.
HeartSaVioR commented on pull request #28831:
URL: https://github.com/apache/spark/pull/28831#issuecomment-644120378


   My bad I missed the existing test, as I target on older Spark version originally. The existing UT works with this patch, because we don't create local variables for dealing with literals (it just seems to leverage global array.) 
   
   Looks like we should deal with splitting code block in any way. Let me change the PR to WIP for now and try to come up with another solution.


----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] [spark] AmplabJenkins removed a comment on pull request #28831: [SPARK-31993][SQL] Build arrays for passing variables generated from children for 'concat_ws' with columns having at least one of array type

Posted by GitBox <gi...@apache.org>.
AmplabJenkins removed a comment on pull request #28831:
URL: https://github.com/apache/spark/pull/28831#issuecomment-646420162






----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] [spark] SparkQA removed a comment on pull request #28831: [SPARK-31993][SQL] Build arrays for passing variables generated from children for 'concat_ws' with columns having at least one of array type

Posted by GitBox <gi...@apache.org>.
SparkQA removed a comment on pull request #28831:
URL: https://github.com/apache/spark/pull/28831#issuecomment-646349387


   **[Test build #124239 has started](https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/124239/testReport)** for PR 28831 at commit [`22ab4ff`](https://github.com/apache/spark/commit/22ab4ff15cc8b2029d356d1be76e0e7e130438dd).


----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] [spark] cloud-fan commented on pull request #28831: [SPARK-31993][SQL] Evaluate children code whenever needed in both varargCounts/varargBuilds for 'concat_ws' with columns having at least one of array type

Posted by GitBox <gi...@apache.org>.
cloud-fan commented on pull request #28831:
URL: https://github.com/apache/spark/pull/28831#issuecomment-645959510


   If we only need to pass the `isNull` in the parameters, maybe we can use `boolean[]` so that the number of children doesn't matter?


----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] [spark] HeartSaVioR commented on a change in pull request #28831: [SPARK-31993][SQL] Build arrays for passing variables generated from children for 'concat_ws' with columns having at least one of array type

Posted by GitBox <gi...@apache.org>.
HeartSaVioR commented on a change in pull request #28831:
URL: https://github.com/apache/spark/pull/28831#discussion_r442563872



##########
File path: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
##########
@@ -182,7 +208,9 @@ case class ConcatWs(children: Seq[Expression])
 
       ev.copy(
         code"""
-        $codes
+        boolean[] $isNullArgs = new boolean[${children.length - 1}];
+        Object[] $valueArgs = new Object[${children.length - 1}];
+        $argBuilds

Review comment:
       ~~If I understand correctly, evals.head is a delimiter (so that doesn't need to be coupled with varargXXX), and it's used in `UTF8String.concatWs`.~~




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] [spark] HeartSaVioR edited a comment on pull request #28831: [SPARK-31993][SQL] Evaluate children code whenever needed in both varargCounts/varargBuilds for 'concat_ws' for mixed string/array types of columns

Posted by GitBox <gi...@apache.org>.
HeartSaVioR edited a comment on pull request #28831:
URL: https://github.com/apache/spark/pull/28831#issuecomment-644538409


   cc. @kiszk @viirya as initial reviewers as I can see they're contributed the code part.
   cc. @bersprockets who helps to review initial patch.


----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] [spark] HeartSaVioR commented on pull request #28831: [SPARK-31993][SQL] Evaluate children code whenever needed in both varargCounts/varargBuilds for 'concat_ws' with columns having at least one of array type

Posted by GitBox <gi...@apache.org>.
HeartSaVioR commented on pull request #28831:
URL: https://github.com/apache/spark/pull/28831#issuecomment-645973967


   It's not just isNull - compiler just found it in prior. All variables children defines during codegen can be missing.


----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] [spark] SparkQA commented on pull request #28831: [SPARK-31993][SQL] Build arrays for passing variables generated from children for 'concat_ws' with columns having at least one of array type

Posted by GitBox <gi...@apache.org>.
SparkQA commented on pull request #28831:
URL: https://github.com/apache/spark/pull/28831#issuecomment-646349387


   **[Test build #124239 has started](https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/124239/testReport)** for PR 28831 at commit [`22ab4ff`](https://github.com/apache/spark/commit/22ab4ff15cc8b2029d356d1be76e0e7e130438dd).


----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] [spark] HeartSaVioR commented on a change in pull request #28831: [SPARK-31993][SQL] Build arrays for passing variables generated from children for 'concat_ws' with columns having at least one of array type

Posted by GitBox <gi...@apache.org>.
HeartSaVioR commented on a change in pull request #28831:
URL: https://github.com/apache/spark/pull/28831#discussion_r442564593



##########
File path: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
##########
@@ -182,7 +208,9 @@ case class ConcatWs(children: Seq[Expression])
 
       ev.copy(
         code"""
-        $codes
+        boolean[] $isNullArgs = new boolean[${children.length - 1}];
+        Object[] $valueArgs = new Object[${children.length - 1}];
+        $argBuilds

Review comment:
       Ah OK I see the case, where delimiter is not literal. I'll add it. Thanks!




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] [spark] AmplabJenkins removed a comment on pull request #28831: [SPARK-31993][SQL] Build arrays for passing variables generated from children for 'concat_ws' with columns having at least one of array type

Posted by GitBox <gi...@apache.org>.
AmplabJenkins removed a comment on pull request #28831:
URL: https://github.com/apache/spark/pull/28831#issuecomment-646368886






----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] [spark] maropu commented on pull request #28831: [SPARK-31993][SQL] Evaluate children code whenever needed in both varargCounts/varargBuilds for 'concat_ws' for mixed string/array types of columns

Posted by GitBox <gi...@apache.org>.
maropu commented on pull request #28831:
URL: https://github.com/apache/spark/pull/28831#issuecomment-645723272


   This issue can happen only in the mixed case? What if all the argument are arrays?


----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] [spark] SparkQA commented on pull request #28831: [SPARK-31993][SQL] Evaluate children code whenever needed in both varargCounts/varargBuilds for 'concat_ws' with columns having at least one of array type

Posted by GitBox <gi...@apache.org>.
SparkQA commented on pull request #28831:
URL: https://github.com/apache/spark/pull/28831#issuecomment-646345044


   **[Test build #124238 has started](https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/124238/testReport)** for PR 28831 at commit [`9b12519`](https://github.com/apache/spark/commit/9b12519d3b1a72a14164710f1e42727ca70fc4dd).


----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] [spark] AmplabJenkins removed a comment on pull request #28831: [SPARK-31993][SQL] Build arrays for passing variables generated from children for 'concat_ws' with columns having at least one of array type

Posted by GitBox <gi...@apache.org>.
AmplabJenkins removed a comment on pull request #28831:
URL: https://github.com/apache/spark/pull/28831#issuecomment-646452038






----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] [spark] SparkQA commented on pull request #28831: [SPARK-31993][SQL] Evaluate children code whenever needed in both varargCounts/varargBuilds for 'concat_ws' for mixed string/array types of columns

Posted by GitBox <gi...@apache.org>.
SparkQA commented on pull request #28831:
URL: https://github.com/apache/spark/pull/28831#issuecomment-644536708


   **[Test build #124096 has started](https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/124096/testReport)** for PR 28831 at commit [`d75ad5a`](https://github.com/apache/spark/commit/d75ad5aee7d24f4037b950137d529ed2ed3fb1cb).


----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] [spark] AmplabJenkins commented on pull request #28831: [SPARK-31993][SQL] Build arrays for passing variables generated from children for 'concat_ws' with columns having at least one of array type

Posted by GitBox <gi...@apache.org>.
AmplabJenkins commented on pull request #28831:
URL: https://github.com/apache/spark/pull/28831#issuecomment-646368886






----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] [spark] AmplabJenkins commented on pull request #28831: [SPARK-31993][SQL] Build arrays for passing variables generated from children for 'concat_ws' with columns having at least one of array type

Posted by GitBox <gi...@apache.org>.
AmplabJenkins commented on pull request #28831:
URL: https://github.com/apache/spark/pull/28831#issuecomment-646345523






----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] [spark] SparkQA commented on pull request #28831: [WIP][SPARK-31993][SQL] Evaluate children code whenever needed in both varargCounts/varargBuilds for 'concat_ws' for mixed string/array types of columns

Posted by GitBox <gi...@apache.org>.
SparkQA commented on pull request #28831:
URL: https://github.com/apache/spark/pull/28831#issuecomment-644519042


   **[Test build #124089 has started](https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/124089/testReport)** for PR 28831 at commit [`623aed0`](https://github.com/apache/spark/commit/623aed05f7614f880b5c7acbc9a6a18bf5d1e67d).


----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] [spark] SparkQA commented on pull request #28831: [SPARK-31993][SQL] Build arrays for passing variables generated from children for 'concat_ws' with columns having at least one of array type

Posted by GitBox <gi...@apache.org>.
SparkQA commented on pull request #28831:
URL: https://github.com/apache/spark/pull/28831#issuecomment-646444772


   **[Test build #124243 has finished](https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/124243/testReport)** for PR 28831 at commit [`2477d11`](https://github.com/apache/spark/commit/2477d119fd1de18a07e02d0b0d5e19f268472d9b).
    * This patch passes all tests.
    * This patch merges cleanly.
    * This patch adds no public classes.


----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] [spark] HeartSaVioR commented on a change in pull request #28831: [SPARK-31993][SQL] Build arrays for passing variables generated from children for 'concat_ws' with columns having at least one of array type

Posted by GitBox <gi...@apache.org>.
HeartSaVioR commented on a change in pull request #28831:
URL: https://github.com/apache/spark/pull/28831#discussion_r442563872



##########
File path: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
##########
@@ -182,7 +208,9 @@ case class ConcatWs(children: Seq[Expression])
 
       ev.copy(
         code"""
-        $codes
+        boolean[] $isNullArgs = new boolean[${children.length - 1}];
+        Object[] $valueArgs = new Object[${children.length - 1}];
+        $argBuilds

Review comment:
       If I understand correctly, evals.head is a delimiter, and it's used in `UTF8String.concatWs`.




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] [spark] cloud-fan closed pull request #28831: [SPARK-31993][SQL] Build arrays for passing variables generated from children for 'concat_ws' with columns having at least one of array type

Posted by GitBox <gi...@apache.org>.
cloud-fan closed pull request #28831:
URL: https://github.com/apache/spark/pull/28831


   


----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] [spark] AmplabJenkins removed a comment on pull request #28831: [SPARK-31993][SQL] Build arrays for passing variables generated from children for 'concat_ws' with columns having at least one of array type

Posted by GitBox <gi...@apache.org>.
AmplabJenkins removed a comment on pull request #28831:
URL: https://github.com/apache/spark/pull/28831#issuecomment-646366969






----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] [spark] HeartSaVioR edited a comment on pull request #28831: [SPARK-31993][SQL] Evaluate children code whenever needed in both varargCounts/varargBuilds for 'concat_ws' with columns having at least one of array type

Posted by GitBox <gi...@apache.org>.
HeartSaVioR edited a comment on pull request #28831:
URL: https://github.com/apache/spark/pull/28831#issuecomment-645973967


   Please go through the generated code for "before the patch". It's not just isNull - compiler just found it in prior. All variables children defines during codegen can be missing.


----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] [spark] SparkQA removed a comment on pull request #28831: [SPARK-31993][SQL] Evaluate children code whenever needed in both varargCounts/varargBuilds for 'concat_ws' for mixed string/array types of columns

Posted by GitBox <gi...@apache.org>.
SparkQA removed a comment on pull request #28831:
URL: https://github.com/apache/spark/pull/28831#issuecomment-644583204


   **[Test build #124111 has started](https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/124111/testReport)** for PR 28831 at commit [`d75ad5a`](https://github.com/apache/spark/commit/d75ad5aee7d24f4037b950137d529ed2ed3fb1cb).


----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] [spark] HeartSaVioR commented on a change in pull request #28831: [SPARK-31993][SQL] Build arrays for passing variables generated from children for 'concat_ws' with columns having at least one of array type

Posted by GitBox <gi...@apache.org>.
HeartSaVioR commented on a change in pull request #28831:
URL: https://github.com/apache/spark/pull/28831#discussion_r442563872



##########
File path: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
##########
@@ -182,7 +208,9 @@ case class ConcatWs(children: Seq[Expression])
 
       ev.copy(
         code"""
-        $codes
+        boolean[] $isNullArgs = new boolean[${children.length - 1}];
+        Object[] $valueArgs = new Object[${children.length - 1}];
+        $argBuilds

Review comment:
       If I understand correctly, evals.head is a delimiter (so that doesn't need to be coupled with varargXXX), and it's used in `UTF8String.concatWs`.




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] [spark] HeartSaVioR commented on pull request #28831: [SPARK-31993][SQL] Evaluate children code whenever needed in both varargCounts/varargBuilds for 'concat_ws' with columns having at least one of array type

Posted by GitBox <gi...@apache.org>.
HeartSaVioR commented on pull request #28831:
URL: https://github.com/apache/spark/pull/28831#issuecomment-645937769


   Thanks for the suggestion!
   
   Based on suggestion, I've experimented with option 2, but with two arrays to copy `isNull` and `value` respectively, because the number of children can be huge - existing UT sets it to 5000, which cannot be passed by parameters directly.
   
   https://github.com/HeartSaVioR/spark/commit/9b12519d3b1a72a14164710f1e42727ca70fc4dd
   
   Below is the generated code running the same UT in PR description: 
   
   ```
   /* 001 */ public java.lang.Object generate(Object[] references) {
   /* 002 */   return new SpecificUnsafeProjection(references);
   /* 003 */ }
   /* 004 */
   /* 005 */ class SpecificUnsafeProjection extends org.apache.spark.sql.catalyst.expressions.UnsafeProjection {
   /* 006 */
   /* 007 */   private Object[] references;
   /* 008 */   private boolean globalIsNull_0;
   /* 009 */   private org.apache.spark.sql.catalyst.expressions.codegen.UnsafeRowWriter[] mutableStateArray_0 = new org.apache.spark.sql.catalyst.expressions.codegen.UnsafeRowWriter[1];
   /* 010 */
   /* 011 */   public SpecificUnsafeProjection(Object[] references) {
   /* 012 */     this.references = references;
   /* 013 */
   /* 014 */     mutableStateArray_0[0] = new org.apache.spark.sql.catalyst.expressions.codegen.UnsafeRowWriter(1, 32);
   /* 015 */
   /* 016 */   }
   /* 017 */
   /* 018 */   public void initialize(int partitionIndex) {
   /* 019 */
   /* 020 */   }
   /* 021 */
   /* 022 */   // Scala.Function1 need this
   /* 023 */   public java.lang.Object apply(java.lang.Object row) {
   /* 024 */     return apply((InternalRow) row);
   /* 025 */   }
   /* 026 */
   /* 027 */   public UnsafeRow apply(InternalRow i) {
   /* 028 */     mutableStateArray_0[0].reset();
   /* 029 */
   /* 030 */
   /* 031 */     mutableStateArray_0[0].zeroOutNullBytes();
   /* 032 */
   /* 033 */     UTF8String value_34 = ConcatWs_0(i);
   /* 034 */     mutableStateArray_0[0].write(0, value_34);
   /* 035 */     return (mutableStateArray_0[0].getRow());
   /* 036 */   }
   /* 037 */
   /* 038 */
   /* 039 */   private void initializeArgsArrays_0_0(InternalRow i, boolean [] isNullArgs_0, Object [] valueArgs_0) {
   /* 040 */
   /* 041 */     boolean isNull_2 = i.isNullAt(31);
   /* 042 */     ArrayData value_2 = isNull_2 ?
   /* 043 */     null : (i.getArray(31));
   /* 044 */     isNullArgs_0[0] = isNull_2;
   /* 045 */     valueArgs_0[0] = value_2;
   /* 046 */
   /* 047 */     boolean isNull_3 = i.isNullAt(32);
   /* 048 */     ArrayData value_3 = isNull_3 ?
   /* 049 */     null : (i.getArray(32));
   /* 050 */     isNullArgs_0[1] = isNull_3;
   /* 051 */     valueArgs_0[1] = value_3;
   /* 052 */
   /* 053 */     UTF8String value_4 = i.getUTF8String(1);
   /* 054 */     isNullArgs_0[2] = false;
   /* 055 */     valueArgs_0[2] = value_4;
   /* 056 */
   /* 057 */     UTF8String value_5 = i.getUTF8String(2);
   /* 058 */     isNullArgs_0[3] = false;
   /* 059 */     valueArgs_0[3] = value_5;
   /* 060 */
   /* 061 */     UTF8String value_6 = i.getUTF8String(3);
   /* 062 */     isNullArgs_0[4] = false;
   /* 063 */     valueArgs_0[4] = value_6;
   /* 064 */
   /* 065 */     UTF8String value_7 = i.getUTF8String(4);
   /* 066 */     isNullArgs_0[5] = false;
   /* 067 */     valueArgs_0[5] = value_7;
   /* 068 */
   /* 069 */     UTF8String value_8 = i.getUTF8String(5);
   /* 070 */     isNullArgs_0[6] = false;
   /* 071 */     valueArgs_0[6] = value_8;
   /* 072 */
   /* 073 */   }
   /* 074 */
   /* 075 */
   /* 076 */   private void initializeArgsArrays_0_3(InternalRow i, boolean [] isNullArgs_0, Object [] valueArgs_0) {
   /* 077 */
   /* 078 */     UTF8String value_25 = i.getUTF8String(22);
   /* 079 */     isNullArgs_0[23] = false;
   /* 080 */     valueArgs_0[23] = value_25;
   /* 081 */
   /* 082 */     UTF8String value_26 = i.getUTF8String(23);
   /* 083 */     isNullArgs_0[24] = false;
   /* 084 */     valueArgs_0[24] = value_26;
   /* 085 */
   /* 086 */     UTF8String value_27 = i.getUTF8String(24);
   /* 087 */     isNullArgs_0[25] = false;
   /* 088 */     valueArgs_0[25] = value_27;
   /* 089 */
   /* 090 */     UTF8String value_28 = i.getUTF8String(25);
   /* 091 */     isNullArgs_0[26] = false;
   /* 092 */     valueArgs_0[26] = value_28;
   /* 093 */
   /* 094 */     UTF8String value_29 = i.getUTF8String(26);
   /* 095 */     isNullArgs_0[27] = false;
   /* 096 */     valueArgs_0[27] = value_29;
   /* 097 */
   /* 098 */     UTF8String value_30 = i.getUTF8String(27);
   /* 099 */     isNullArgs_0[28] = false;
   /* 100 */     valueArgs_0[28] = value_30;
   /* 101 */
   /* 102 */     UTF8String value_31 = i.getUTF8String(28);
   /* 103 */     isNullArgs_0[29] = false;
   /* 104 */     valueArgs_0[29] = value_31;
   /* 105 */
   /* 106 */     UTF8String value_32 = i.getUTF8String(29);
   /* 107 */     isNullArgs_0[30] = false;
   /* 108 */     valueArgs_0[30] = value_32;
   /* 109 */
   /* 110 */   }
   /* 111 */
   /* 112 */
   /* 113 */   private int varargBuildsConcatWs_0_3(InternalRow i, UTF8String [] array_0, int idxInVararg_0, boolean [] isNullArgs_0, Object [] valueArgs_0) {
   /* 114 */
   /* 115 */     array_0[idxInVararg_0 ++] = isNullArgs_0[29] ? (UTF8String) null : ((UTF8String) valueArgs_0[29]);array_0[idxInVararg_0 ++] = isNullArgs_0[30] ? (UTF8String) null : ((UTF8String) valueArgs_0[30]);array_0[idxInVararg_0 ++] = isNullArgs_0[31] ? (UTF8String) null : ((UTF8String) valueArgs_0[31]);
   /* 116 */     return idxInVararg_0;
   /* 117 */
   /* 118 */   }
   /* 119 */
   /* 120 */
   /* 121 */   private int varargBuildsConcatWs_0_0(InternalRow i, UTF8String [] array_0, int idxInVararg_0, boolean [] isNullArgs_0, Object [] valueArgs_0) {
   /* 122 */
   /* 123 */
   /* 124 */     if (!isNullArgs_0[0]) {
   /* 125 */       final int n_0 = ((ArrayData) valueArgs_0[0]).numElements();
   /* 126 */       for (int j = 0; j < n_0; j ++) {
   /* 127 */         array_0[idxInVararg_0 ++] = ((ArrayData) valueArgs_0[0]).getUTF8String(j);
   /* 128 */       }
   /* 129 */     }
   /* 130 */
   /* 131 */     if (!isNullArgs_0[1]) {
   /* 132 */       final int n_1 = ((ArrayData) valueArgs_0[1]).numElements();
   /* 133 */       for (int j = 0; j < n_1; j ++) {
   /* 134 */         array_0[idxInVararg_0 ++] = ((ArrayData) valueArgs_0[1]).getUTF8String(j);
   /* 135 */       }
   /* 136 */     }
   /* 137 */     array_0[idxInVararg_0 ++] = isNullArgs_0[2] ? (UTF8String) null : ((UTF8String) valueArgs_0[2]);array_0[idxInVararg_0 ++] = isNullArgs_0[3] ? (UTF8String) null : ((UTF8String) valueArgs_0[3]);array_0[idxInVararg_0 ++] = isNullArgs_0[4] ? (UTF8String) null : ((UTF8String) valueArgs_0[4]);array_0[idxInVararg_0 ++] = isNullArgs_0[5] ? (UTF8String) null : ((UTF8String) valueArgs_0[5]);array_0[idxInVararg_0 ++] = isNullArgs_0[6] ? (UTF8String) null : ((UTF8String) valueArgs_0[6]);
   /* 138 */     return idxInVararg_0;
   /* 139 */
   /* 140 */   }
   /* 141 */
   /* 142 */
   /* 143 */   private UTF8String ConcatWs_0(InternalRow i) {
   /* 144 */     boolean[] isNullArgs_0 = new boolean[32];
   /* 145 */     Object[] valueArgs_0 = new Object[32];
   /* 146 */     initializeArgsArrays_0_0(i, isNullArgs_0, valueArgs_0);
   /* 147 */     initializeArgsArrays_0_1(i, isNullArgs_0, valueArgs_0);
   /* 148 */     initializeArgsArrays_0_2(i, isNullArgs_0, valueArgs_0);
   /* 149 */     initializeArgsArrays_0_3(i, isNullArgs_0, valueArgs_0);
   /* 150 */     initializeArgsArrays_0_4(i, isNullArgs_0, valueArgs_0);
   /* 151 */     int varargNum_0 = 30;
   /* 152 */     int idxInVararg_0 = 0;
   /* 153 */
   /* 154 */     if (!isNullArgs_0[0]) {
   /* 155 */       varargNum_0 += ((ArrayData) valueArgs_0[0]).numElements();
   /* 156 */     }
   /* 157 */
   /* 158 */     if (!isNullArgs_0[1]) {
   /* 159 */       varargNum_0 += ((ArrayData) valueArgs_0[1]).numElements();
   /* 160 */     }
   /* 161 */
   /* 162 */     UTF8String[] array_0 = new UTF8String[varargNum_0];
   /* 163 */     idxInVararg_0 = varargBuildsConcatWs_0_0(i, array_0, idxInVararg_0, isNullArgs_0, valueArgs_0);
   /* 164 */     idxInVararg_0 = varargBuildsConcatWs_0_1(i, array_0, idxInVararg_0, isNullArgs_0, valueArgs_0);
   /* 165 */     idxInVararg_0 = varargBuildsConcatWs_0_2(i, array_0, idxInVararg_0, isNullArgs_0, valueArgs_0);
   /* 166 */     idxInVararg_0 = varargBuildsConcatWs_0_3(i, array_0, idxInVararg_0, isNullArgs_0, valueArgs_0);
   /* 167 */     UTF8String value_0 = UTF8String.concatWs(((UTF8String) references[0] /* literal */), array_0);
   /* 168 */     boolean isNull_0 = value_0 == null;
   /* 169 */     globalIsNull_0 = isNull_0;
   /* 170 */     return value_0;
   /* 171 */   }
   /* 172 */
   /* 173 */
   /* 174 */   private void initializeArgsArrays_0_2(InternalRow i, boolean [] isNullArgs_0, Object [] valueArgs_0) {
   /* 175 */
   /* 176 */     UTF8String value_17 = i.getUTF8String(14);
   /* 177 */     isNullArgs_0[15] = false;
   /* 178 */     valueArgs_0[15] = value_17;
   /* 179 */
   /* 180 */     UTF8String value_18 = i.getUTF8String(15);
   /* 181 */     isNullArgs_0[16] = false;
   /* 182 */     valueArgs_0[16] = value_18;
   /* 183 */
   /* 184 */     UTF8String value_19 = i.getUTF8String(16);
   /* 185 */     isNullArgs_0[17] = false;
   /* 186 */     valueArgs_0[17] = value_19;
   /* 187 */
   /* 188 */     UTF8String value_20 = i.getUTF8String(17);
   /* 189 */     isNullArgs_0[18] = false;
   /* 190 */     valueArgs_0[18] = value_20;
   /* 191 */
   /* 192 */     UTF8String value_21 = i.getUTF8String(18);
   /* 193 */     isNullArgs_0[19] = false;
   /* 194 */     valueArgs_0[19] = value_21;
   /* 195 */
   /* 196 */     UTF8String value_22 = i.getUTF8String(19);
   /* 197 */     isNullArgs_0[20] = false;
   /* 198 */     valueArgs_0[20] = value_22;
   /* 199 */
   /* 200 */     UTF8String value_23 = i.getUTF8String(20);
   /* 201 */     isNullArgs_0[21] = false;
   /* 202 */     valueArgs_0[21] = value_23;
   /* 203 */
   /* 204 */     UTF8String value_24 = i.getUTF8String(21);
   /* 205 */     isNullArgs_0[22] = false;
   /* 206 */     valueArgs_0[22] = value_24;
   /* 207 */
   /* 208 */   }
   /* 209 */
   /* 210 */
   /* 211 */   private int varargBuildsConcatWs_0_2(InternalRow i, UTF8String [] array_0, int idxInVararg_0, boolean [] isNullArgs_0, Object [] valueArgs_0) {
   /* 212 */
   /* 213 */     array_0[idxInVararg_0 ++] = isNullArgs_0[18] ? (UTF8String) null : ((UTF8String) valueArgs_0[18]);array_0[idxInVararg_0 ++] = isNullArgs_0[19] ? (UTF8String) null : ((UTF8String) valueArgs_0[19]);array_0[idxInVararg_0 ++] = isNullArgs_0[20] ? (UTF8String) null : ((UTF8String) valueArgs_0[20]);array_0[idxInVararg_0 ++] = isNullArgs_0[21] ? (UTF8String) null : ((UTF8String) valueArgs_0[21]);array_0[idxInVararg_0 ++] = isNullArgs_0[22] ? (UTF8String) null : ((UTF8String) valueArgs_0[22]);array_0[idxInVararg_0 ++] = isNullArgs_0[23] ? (UTF8String) null : ((UTF8String) valueArgs_0[23]);array_0[idxInVararg_0 ++] = isNullArgs_0[24] ? (UTF8String) null : ((UTF8String) valueArgs_0[24]);array_0[idxInVararg_0 ++] = isNullArgs_0[25] ? (UTF8String) null : ((UTF8String) valueArgs_0[25]);array_0[idxInVararg_0 ++] = isNullArgs_0[26] ? (UTF8String) null : ((UTF8String) valueArgs_0[26]);array_0[idxInVararg_0 ++] = isNullArgs_0[27] ? (UTF8String) null : ((UTF8String) valueArgs_0[27]);array_0[idxInVararg_0 ++] = isNullArgs_0[28] ? (UTF8String) null : ((UTF8String) valueArgs_0[28]);
   /* 214 */     return idxInVararg_0;
   /* 215 */
   /* 216 */   }
   /* 217 */
   /* 218 */
   /* 219 */   private void initializeArgsArrays_0_4(InternalRow i, boolean [] isNullArgs_0, Object [] valueArgs_0) {
   /* 220 */
   /* 221 */     UTF8String value_33 = i.getUTF8String(30);
   /* 222 */     isNullArgs_0[31] = false;
   /* 223 */     valueArgs_0[31] = value_33;
   /* 224 */
   /* 225 */   }
   /* 226 */
   /* 227 */
   /* 228 */   private void initializeArgsArrays_0_1(InternalRow i, boolean [] isNullArgs_0, Object [] valueArgs_0) {
   /* 229 */
   /* 230 */     UTF8String value_9 = i.getUTF8String(6);
   /* 231 */     isNullArgs_0[7] = false;
   /* 232 */     valueArgs_0[7] = value_9;
   /* 233 */
   /* 234 */     UTF8String value_10 = i.getUTF8String(7);
   /* 235 */     isNullArgs_0[8] = false;
   /* 236 */     valueArgs_0[8] = value_10;
   /* 237 */
   /* 238 */     UTF8String value_11 = i.getUTF8String(8);
   /* 239 */     isNullArgs_0[9] = false;
   /* 240 */     valueArgs_0[9] = value_11;
   /* 241 */
   /* 242 */     UTF8String value_12 = i.getUTF8String(9);
   /* 243 */     isNullArgs_0[10] = false;
   /* 244 */     valueArgs_0[10] = value_12;
   /* 245 */
   /* 246 */     UTF8String value_13 = i.getUTF8String(10);
   /* 247 */     isNullArgs_0[11] = false;
   /* 248 */     valueArgs_0[11] = value_13;
   /* 249 */
   /* 250 */     UTF8String value_14 = i.getUTF8String(11);
   /* 251 */     isNullArgs_0[12] = false;
   /* 252 */     valueArgs_0[12] = value_14;
   /* 253 */
   /* 254 */     UTF8String value_15 = i.getUTF8String(12);
   /* 255 */     isNullArgs_0[13] = false;
   /* 256 */     valueArgs_0[13] = value_15;
   /* 257 */
   /* 258 */     UTF8String value_16 = i.getUTF8String(13);
   /* 259 */     isNullArgs_0[14] = false;
   /* 260 */     valueArgs_0[14] = value_16;
   /* 261 */
   /* 262 */   }
   /* 263 */
   /* 264 */
   /* 265 */   private int varargBuildsConcatWs_0_1(InternalRow i, UTF8String [] array_0, int idxInVararg_0, boolean [] isNullArgs_0, Object [] valueArgs_0) {
   /* 266 */
   /* 267 */     array_0[idxInVararg_0 ++] = isNullArgs_0[7] ? (UTF8String) null : ((UTF8String) valueArgs_0[7]);array_0[idxInVararg_0 ++] = isNullArgs_0[8] ? (UTF8String) null : ((UTF8String) valueArgs_0[8]);array_0[idxInVararg_0 ++] = isNullArgs_0[9] ? (UTF8String) null : ((UTF8String) valueArgs_0[9]);array_0[idxInVararg_0 ++] = isNullArgs_0[10] ? (UTF8String) null : ((UTF8String) valueArgs_0[10]);array_0[idxInVararg_0 ++] = isNullArgs_0[11] ? (UTF8String) null : ((UTF8String) valueArgs_0[11]);array_0[idxInVararg_0 ++] = isNullArgs_0[12] ? (UTF8String) null : ((UTF8String) valueArgs_0[12]);array_0[idxInVararg_0 ++] = isNullArgs_0[13] ? (UTF8String) null : ((UTF8String) valueArgs_0[13]);array_0[idxInVararg_0 ++] = isNullArgs_0[14] ? (UTF8String) null : ((UTF8String) valueArgs_0[14]);array_0[idxInVararg_0 ++] = isNullArgs_0[15] ? (UTF8String) null : ((UTF8String) valueArgs_0[15]);array_0[idxInVararg_0 ++] = isNullArgs_0[16] ? (UTF8String) null : ((UTF8String) valueArgs_0[16]);array_0[idxInVararg_0 ++] = isNullArgs_0[17] ? (UTF8String) null : ((UTF8String) valueArgs_0[17]);
   /* 268 */     return idxInVararg_0;
   /* 269 */
   /* 270 */   }
   /* 271 */
   /* 272 */ }
   ```
   
   Option 1 looks also viable - I guess there's also an limitation of the number of class variables so utilizing arrays looks more flexible, but it might be really edge-case and we might want to just ignore the case.
   
   I'll rebase this PR with the commit if we prefer the new commit. If we would like to go with class variables I'll try out and provide a new commit.
   
   Thanks!


----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] [spark] AmplabJenkins removed a comment on pull request #28831: [SPARK-31993][SQL] Evaluate children code whenever needed in both varargCounts/varargBuilds for 'concat_ws' for mixed string/array types of columns

Posted by GitBox <gi...@apache.org>.
AmplabJenkins removed a comment on pull request #28831:
URL: https://github.com/apache/spark/pull/28831#issuecomment-644737298






----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] [spark] SparkQA removed a comment on pull request #28831: [SPARK-31993][SQL] Build arrays for passing variables generated from children for 'concat_ws' with columns having at least one of array type

Posted by GitBox <gi...@apache.org>.
SparkQA removed a comment on pull request #28831:
URL: https://github.com/apache/spark/pull/28831#issuecomment-646368574


   **[Test build #124243 has started](https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/124243/testReport)** for PR 28831 at commit [`2477d11`](https://github.com/apache/spark/commit/2477d119fd1de18a07e02d0b0d5e19f268472d9b).


----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] [spark] SparkQA commented on pull request #28831: [SPARK-31993][SQL] Build arrays for passing variables generated from children for 'concat_ws' with columns having at least one of array type

Posted by GitBox <gi...@apache.org>.
SparkQA commented on pull request #28831:
URL: https://github.com/apache/spark/pull/28831#issuecomment-646419702


   **[Test build #124239 has finished](https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/124239/testReport)** for PR 28831 at commit [`22ab4ff`](https://github.com/apache/spark/commit/22ab4ff15cc8b2029d356d1be76e0e7e130438dd).
    * This patch passes all tests.
    * This patch merges cleanly.
    * This patch adds no public classes.


----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] [spark] SparkQA commented on pull request #28831: [SPARK-31993][SQL] Don't split code blocks in generated code for 'concat_ws' for mixed string/array types of columns

Posted by GitBox <gi...@apache.org>.
SparkQA commented on pull request #28831:
URL: https://github.com/apache/spark/pull/28831#issuecomment-644025890


   **[Test build #124043 has finished](https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/124043/testReport)** for PR 28831 at commit [`3e4ffbd`](https://github.com/apache/spark/commit/3e4ffbd381f81295c76a3f8bc822adc007aae621).
    * This patch **fails Spark unit tests**.
    * This patch merges cleanly.
    * This patch adds no public classes.


----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] [spark] AmplabJenkins commented on pull request #28831: [SPARK-31993][SQL] Build arrays for passing variables generated from children for 'concat_ws' with columns having at least one of array type

Posted by GitBox <gi...@apache.org>.
AmplabJenkins commented on pull request #28831:
URL: https://github.com/apache/spark/pull/28831#issuecomment-646349614






----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] [spark] SparkQA commented on pull request #28831: [SPARK-31993][SQL] Build arrays for passing variables generated from children for 'concat_ws' with columns having at least one of array type

Posted by GitBox <gi...@apache.org>.
SparkQA commented on pull request #28831:
URL: https://github.com/apache/spark/pull/28831#issuecomment-646368574


   **[Test build #124243 has started](https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/124243/testReport)** for PR 28831 at commit [`2477d11`](https://github.com/apache/spark/commit/2477d119fd1de18a07e02d0b0d5e19f268472d9b).


----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] [spark] HeartSaVioR commented on pull request #28831: [SPARK-31993][SQL] Evaluate children code whenever needed in both varargCounts/varargBuilds for 'concat_ws' with columns having at least one of array type

Posted by GitBox <gi...@apache.org>.
HeartSaVioR commented on pull request #28831:
URL: https://github.com/apache/spark/pull/28831#issuecomment-646338960


   Let me rebase as the new commit seems to match the suggestion.


----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] [spark] HeartSaVioR commented on pull request #28831: [SPARK-31993][SQL] Don't split code blocks in generated code for 'concat_ws' for mixed string/array types of columns

Posted by GitBox <gi...@apache.org>.
HeartSaVioR commented on pull request #28831:
URL: https://github.com/apache/spark/pull/28831#issuecomment-643978420


   There might be still a chance to compose these all parts into one and pass to splitExpressionsWithCurrentInputs, but it requires rewrite of code because there're two different "state" variables instead of one. It would be nice if we are OK to leave up the optimization to the separate PR.


----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org