You are viewing a plain text version of this content. The canonical link for it is here.
Posted to reviews@spark.apache.org by GitBox <gi...@apache.org> on 2020/06/18 10:44:48 UTC

[GitHub] [spark] HeartSaVioR edited a comment on pull request #28831: [SPARK-31993][SQL] Evaluate children code whenever needed in both varargCounts/varargBuilds for 'concat_ws' with columns having at least one of array type

HeartSaVioR edited a comment on pull request #28831:
URL: https://github.com/apache/spark/pull/28831#issuecomment-645937769


   Thanks for the suggestion!
   
   Based on suggestion, I've experimented with option 2, but with two arrays to copy `isNull` and `value` respectively, because the number of children can be huge - existing UT sets it to 5000, which cannot be passed by parameters directly.
   
   https://github.com/HeartSaVioR/spark/commit/9b12519d3b1a72a14164710f1e42727ca70fc4dd
   
   Below is the generated code running the same UT in PR description: 
   
   ```
   /* 001 */ public java.lang.Object generate(Object[] references) {
   /* 002 */   return new SpecificUnsafeProjection(references);
   /* 003 */ }
   /* 004 */
   /* 005 */ class SpecificUnsafeProjection extends org.apache.spark.sql.catalyst.expressions.UnsafeProjection {
   /* 006 */
   /* 007 */   private Object[] references;
   /* 008 */   private boolean globalIsNull_0;
   /* 009 */   private org.apache.spark.sql.catalyst.expressions.codegen.UnsafeRowWriter[] mutableStateArray_0 = new org.apache.spark.sql.catalyst.expressions.codegen.UnsafeRowWriter[1];
   /* 010 */
   /* 011 */   public SpecificUnsafeProjection(Object[] references) {
   /* 012 */     this.references = references;
   /* 013 */
   /* 014 */     mutableStateArray_0[0] = new org.apache.spark.sql.catalyst.expressions.codegen.UnsafeRowWriter(1, 32);
   /* 015 */
   /* 016 */   }
   /* 017 */
   /* 018 */   public void initialize(int partitionIndex) {
   /* 019 */
   /* 020 */   }
   /* 021 */
   /* 022 */   // Scala.Function1 need this
   /* 023 */   public java.lang.Object apply(java.lang.Object row) {
   /* 024 */     return apply((InternalRow) row);
   /* 025 */   }
   /* 026 */
   /* 027 */   public UnsafeRow apply(InternalRow i) {
   /* 028 */     mutableStateArray_0[0].reset();
   /* 029 */
   /* 030 */
   /* 031 */     mutableStateArray_0[0].zeroOutNullBytes();
   /* 032 */
   /* 033 */     UTF8String value_34 = ConcatWs_0(i);
   /* 034 */     mutableStateArray_0[0].write(0, value_34);
   /* 035 */     return (mutableStateArray_0[0].getRow());
   /* 036 */   }
   /* 037 */
   /* 038 */
   /* 039 */   private void initializeArgsArrays_0_0(InternalRow i, boolean [] isNullArgs_0, Object [] valueArgs_0) {
   /* 040 */
   /* 041 */     boolean isNull_2 = i.isNullAt(31);
   /* 042 */     ArrayData value_2 = isNull_2 ?
   /* 043 */     null : (i.getArray(31));
   /* 044 */     isNullArgs_0[0] = isNull_2;
   /* 045 */     valueArgs_0[0] = value_2;
   /* 046 */
   /* 047 */     boolean isNull_3 = i.isNullAt(32);
   /* 048 */     ArrayData value_3 = isNull_3 ?
   /* 049 */     null : (i.getArray(32));
   /* 050 */     isNullArgs_0[1] = isNull_3;
   /* 051 */     valueArgs_0[1] = value_3;
   /* 052 */
   /* 053 */     UTF8String value_4 = i.getUTF8String(1);
   /* 054 */     isNullArgs_0[2] = false;
   /* 055 */     valueArgs_0[2] = value_4;
   /* 056 */
   /* 057 */     UTF8String value_5 = i.getUTF8String(2);
   /* 058 */     isNullArgs_0[3] = false;
   /* 059 */     valueArgs_0[3] = value_5;
   /* 060 */
   /* 061 */     UTF8String value_6 = i.getUTF8String(3);
   /* 062 */     isNullArgs_0[4] = false;
   /* 063 */     valueArgs_0[4] = value_6;
   /* 064 */
   /* 065 */     UTF8String value_7 = i.getUTF8String(4);
   /* 066 */     isNullArgs_0[5] = false;
   /* 067 */     valueArgs_0[5] = value_7;
   /* 068 */
   /* 069 */     UTF8String value_8 = i.getUTF8String(5);
   /* 070 */     isNullArgs_0[6] = false;
   /* 071 */     valueArgs_0[6] = value_8;
   /* 072 */
   /* 073 */   }
   /* 074 */
   /* 075 */
   /* 076 */   private void initializeArgsArrays_0_3(InternalRow i, boolean [] isNullArgs_0, Object [] valueArgs_0) {
   /* 077 */
   /* 078 */     UTF8String value_25 = i.getUTF8String(22);
   /* 079 */     isNullArgs_0[23] = false;
   /* 080 */     valueArgs_0[23] = value_25;
   /* 081 */
   /* 082 */     UTF8String value_26 = i.getUTF8String(23);
   /* 083 */     isNullArgs_0[24] = false;
   /* 084 */     valueArgs_0[24] = value_26;
   /* 085 */
   /* 086 */     UTF8String value_27 = i.getUTF8String(24);
   /* 087 */     isNullArgs_0[25] = false;
   /* 088 */     valueArgs_0[25] = value_27;
   /* 089 */
   /* 090 */     UTF8String value_28 = i.getUTF8String(25);
   /* 091 */     isNullArgs_0[26] = false;
   /* 092 */     valueArgs_0[26] = value_28;
   /* 093 */
   /* 094 */     UTF8String value_29 = i.getUTF8String(26);
   /* 095 */     isNullArgs_0[27] = false;
   /* 096 */     valueArgs_0[27] = value_29;
   /* 097 */
   /* 098 */     UTF8String value_30 = i.getUTF8String(27);
   /* 099 */     isNullArgs_0[28] = false;
   /* 100 */     valueArgs_0[28] = value_30;
   /* 101 */
   /* 102 */     UTF8String value_31 = i.getUTF8String(28);
   /* 103 */     isNullArgs_0[29] = false;
   /* 104 */     valueArgs_0[29] = value_31;
   /* 105 */
   /* 106 */     UTF8String value_32 = i.getUTF8String(29);
   /* 107 */     isNullArgs_0[30] = false;
   /* 108 */     valueArgs_0[30] = value_32;
   /* 109 */
   /* 110 */   }
   /* 111 */
   /* 112 */
   /* 113 */   private int varargBuildsConcatWs_0_3(InternalRow i, UTF8String [] array_0, int idxInVararg_0, boolean [] isNullArgs_0, Object [] valueArgs_0) {
   /* 114 */
   /* 115 */     array_0[idxInVararg_0 ++] = isNullArgs_0[29] ? (UTF8String) null : ((UTF8String) valueArgs_0[29]);array_0[idxInVararg_0 ++] = isNullArgs_0[30] ? (UTF8String) null : ((UTF8String) valueArgs_0[30]);array_0[idxInVararg_0 ++] = isNullArgs_0[31] ? (UTF8String) null : ((UTF8String) valueArgs_0[31]);
   /* 116 */     return idxInVararg_0;
   /* 117 */
   /* 118 */   }
   /* 119 */
   /* 120 */
   /* 121 */   private int varargBuildsConcatWs_0_0(InternalRow i, UTF8String [] array_0, int idxInVararg_0, boolean [] isNullArgs_0, Object [] valueArgs_0) {
   /* 122 */
   /* 123 */
   /* 124 */     if (!isNullArgs_0[0]) {
   /* 125 */       final int n_0 = ((ArrayData) valueArgs_0[0]).numElements();
   /* 126 */       for (int j = 0; j < n_0; j ++) {
   /* 127 */         array_0[idxInVararg_0 ++] = ((ArrayData) valueArgs_0[0]).getUTF8String(j);
   /* 128 */       }
   /* 129 */     }
   /* 130 */
   /* 131 */     if (!isNullArgs_0[1]) {
   /* 132 */       final int n_1 = ((ArrayData) valueArgs_0[1]).numElements();
   /* 133 */       for (int j = 0; j < n_1; j ++) {
   /* 134 */         array_0[idxInVararg_0 ++] = ((ArrayData) valueArgs_0[1]).getUTF8String(j);
   /* 135 */       }
   /* 136 */     }
   /* 137 */     array_0[idxInVararg_0 ++] = isNullArgs_0[2] ? (UTF8String) null : ((UTF8String) valueArgs_0[2]);array_0[idxInVararg_0 ++] = isNullArgs_0[3] ? (UTF8String) null : ((UTF8String) valueArgs_0[3]);array_0[idxInVararg_0 ++] = isNullArgs_0[4] ? (UTF8String) null : ((UTF8String) valueArgs_0[4]);array_0[idxInVararg_0 ++] = isNullArgs_0[5] ? (UTF8String) null : ((UTF8String) valueArgs_0[5]);array_0[idxInVararg_0 ++] = isNullArgs_0[6] ? (UTF8String) null : ((UTF8String) valueArgs_0[6]);
   /* 138 */     return idxInVararg_0;
   /* 139 */
   /* 140 */   }
   /* 141 */
   /* 142 */
   /* 143 */   private UTF8String ConcatWs_0(InternalRow i) {
   /* 144 */     boolean[] isNullArgs_0 = new boolean[32];
   /* 145 */     Object[] valueArgs_0 = new Object[32];
   /* 146 */     initializeArgsArrays_0_0(i, isNullArgs_0, valueArgs_0);
   /* 147 */     initializeArgsArrays_0_1(i, isNullArgs_0, valueArgs_0);
   /* 148 */     initializeArgsArrays_0_2(i, isNullArgs_0, valueArgs_0);
   /* 149 */     initializeArgsArrays_0_3(i, isNullArgs_0, valueArgs_0);
   /* 150 */     initializeArgsArrays_0_4(i, isNullArgs_0, valueArgs_0);
   /* 151 */     int varargNum_0 = 30;
   /* 152 */     int idxInVararg_0 = 0;
   /* 153 */
   /* 154 */     if (!isNullArgs_0[0]) {
   /* 155 */       varargNum_0 += ((ArrayData) valueArgs_0[0]).numElements();
   /* 156 */     }
   /* 157 */
   /* 158 */     if (!isNullArgs_0[1]) {
   /* 159 */       varargNum_0 += ((ArrayData) valueArgs_0[1]).numElements();
   /* 160 */     }
   /* 161 */
   /* 162 */     UTF8String[] array_0 = new UTF8String[varargNum_0];
   /* 163 */     idxInVararg_0 = varargBuildsConcatWs_0_0(i, array_0, idxInVararg_0, isNullArgs_0, valueArgs_0);
   /* 164 */     idxInVararg_0 = varargBuildsConcatWs_0_1(i, array_0, idxInVararg_0, isNullArgs_0, valueArgs_0);
   /* 165 */     idxInVararg_0 = varargBuildsConcatWs_0_2(i, array_0, idxInVararg_0, isNullArgs_0, valueArgs_0);
   /* 166 */     idxInVararg_0 = varargBuildsConcatWs_0_3(i, array_0, idxInVararg_0, isNullArgs_0, valueArgs_0);
   /* 167 */     UTF8String value_0 = UTF8String.concatWs(((UTF8String) references[0] /* literal */), array_0);
   /* 168 */     boolean isNull_0 = value_0 == null;
   /* 169 */     globalIsNull_0 = isNull_0;
   /* 170 */     return value_0;
   /* 171 */   }
   /* 172 */
   /* 173 */
   /* 174 */   private void initializeArgsArrays_0_2(InternalRow i, boolean [] isNullArgs_0, Object [] valueArgs_0) {
   /* 175 */
   /* 176 */     UTF8String value_17 = i.getUTF8String(14);
   /* 177 */     isNullArgs_0[15] = false;
   /* 178 */     valueArgs_0[15] = value_17;
   /* 179 */
   /* 180 */     UTF8String value_18 = i.getUTF8String(15);
   /* 181 */     isNullArgs_0[16] = false;
   /* 182 */     valueArgs_0[16] = value_18;
   /* 183 */
   /* 184 */     UTF8String value_19 = i.getUTF8String(16);
   /* 185 */     isNullArgs_0[17] = false;
   /* 186 */     valueArgs_0[17] = value_19;
   /* 187 */
   /* 188 */     UTF8String value_20 = i.getUTF8String(17);
   /* 189 */     isNullArgs_0[18] = false;
   /* 190 */     valueArgs_0[18] = value_20;
   /* 191 */
   /* 192 */     UTF8String value_21 = i.getUTF8String(18);
   /* 193 */     isNullArgs_0[19] = false;
   /* 194 */     valueArgs_0[19] = value_21;
   /* 195 */
   /* 196 */     UTF8String value_22 = i.getUTF8String(19);
   /* 197 */     isNullArgs_0[20] = false;
   /* 198 */     valueArgs_0[20] = value_22;
   /* 199 */
   /* 200 */     UTF8String value_23 = i.getUTF8String(20);
   /* 201 */     isNullArgs_0[21] = false;
   /* 202 */     valueArgs_0[21] = value_23;
   /* 203 */
   /* 204 */     UTF8String value_24 = i.getUTF8String(21);
   /* 205 */     isNullArgs_0[22] = false;
   /* 206 */     valueArgs_0[22] = value_24;
   /* 207 */
   /* 208 */   }
   /* 209 */
   /* 210 */
   /* 211 */   private int varargBuildsConcatWs_0_2(InternalRow i, UTF8String [] array_0, int idxInVararg_0, boolean [] isNullArgs_0, Object [] valueArgs_0) {
   /* 212 */
   /* 213 */     array_0[idxInVararg_0 ++] = isNullArgs_0[18] ? (UTF8String) null : ((UTF8String) valueArgs_0[18]);array_0[idxInVararg_0 ++] = isNullArgs_0[19] ? (UTF8String) null : ((UTF8String) valueArgs_0[19]);array_0[idxInVararg_0 ++] = isNullArgs_0[20] ? (UTF8String) null : ((UTF8String) valueArgs_0[20]);array_0[idxInVararg_0 ++] = isNullArgs_0[21] ? (UTF8String) null : ((UTF8String) valueArgs_0[21]);array_0[idxInVararg_0 ++] = isNullArgs_0[22] ? (UTF8String) null : ((UTF8String) valueArgs_0[22]);array_0[idxInVararg_0 ++] = isNullArgs_0[23] ? (UTF8String) null : ((UTF8String) valueArgs_0[23]);array_0[idxInVararg_0 ++] = isNullArgs_0[24] ? (UTF8String) null : ((UTF8String) valueArgs_0[24]);array_0[idxInVararg_0 ++] = isNullArgs_0[25] ? (UTF8String) null : ((UTF8String) valueArgs_0[25]);array_0[idxInVararg_0 ++] = isNullArgs_0[26] ? (UTF8String) null : ((UTF8String) valueArgs_0[26]);array_0[idxInVararg_0 ++] = isNullArgs_0[27] ? (UTF8String) null : ((UTF8String) valueArgs_0[27]);array_0[idxInVararg_0 ++] = isNullArgs_0[28] ? (UTF8String) null : ((UTF8String) valueArgs_0[28]);
   /* 214 */     return idxInVararg_0;
   /* 215 */
   /* 216 */   }
   /* 217 */
   /* 218 */
   /* 219 */   private void initializeArgsArrays_0_4(InternalRow i, boolean [] isNullArgs_0, Object [] valueArgs_0) {
   /* 220 */
   /* 221 */     UTF8String value_33 = i.getUTF8String(30);
   /* 222 */     isNullArgs_0[31] = false;
   /* 223 */     valueArgs_0[31] = value_33;
   /* 224 */
   /* 225 */   }
   /* 226 */
   /* 227 */
   /* 228 */   private void initializeArgsArrays_0_1(InternalRow i, boolean [] isNullArgs_0, Object [] valueArgs_0) {
   /* 229 */
   /* 230 */     UTF8String value_9 = i.getUTF8String(6);
   /* 231 */     isNullArgs_0[7] = false;
   /* 232 */     valueArgs_0[7] = value_9;
   /* 233 */
   /* 234 */     UTF8String value_10 = i.getUTF8String(7);
   /* 235 */     isNullArgs_0[8] = false;
   /* 236 */     valueArgs_0[8] = value_10;
   /* 237 */
   /* 238 */     UTF8String value_11 = i.getUTF8String(8);
   /* 239 */     isNullArgs_0[9] = false;
   /* 240 */     valueArgs_0[9] = value_11;
   /* 241 */
   /* 242 */     UTF8String value_12 = i.getUTF8String(9);
   /* 243 */     isNullArgs_0[10] = false;
   /* 244 */     valueArgs_0[10] = value_12;
   /* 245 */
   /* 246 */     UTF8String value_13 = i.getUTF8String(10);
   /* 247 */     isNullArgs_0[11] = false;
   /* 248 */     valueArgs_0[11] = value_13;
   /* 249 */
   /* 250 */     UTF8String value_14 = i.getUTF8String(11);
   /* 251 */     isNullArgs_0[12] = false;
   /* 252 */     valueArgs_0[12] = value_14;
   /* 253 */
   /* 254 */     UTF8String value_15 = i.getUTF8String(12);
   /* 255 */     isNullArgs_0[13] = false;
   /* 256 */     valueArgs_0[13] = value_15;
   /* 257 */
   /* 258 */     UTF8String value_16 = i.getUTF8String(13);
   /* 259 */     isNullArgs_0[14] = false;
   /* 260 */     valueArgs_0[14] = value_16;
   /* 261 */
   /* 262 */   }
   /* 263 */
   /* 264 */
   /* 265 */   private int varargBuildsConcatWs_0_1(InternalRow i, UTF8String [] array_0, int idxInVararg_0, boolean [] isNullArgs_0, Object [] valueArgs_0) {
   /* 266 */
   /* 267 */     array_0[idxInVararg_0 ++] = isNullArgs_0[7] ? (UTF8String) null : ((UTF8String) valueArgs_0[7]);array_0[idxInVararg_0 ++] = isNullArgs_0[8] ? (UTF8String) null : ((UTF8String) valueArgs_0[8]);array_0[idxInVararg_0 ++] = isNullArgs_0[9] ? (UTF8String) null : ((UTF8String) valueArgs_0[9]);array_0[idxInVararg_0 ++] = isNullArgs_0[10] ? (UTF8String) null : ((UTF8String) valueArgs_0[10]);array_0[idxInVararg_0 ++] = isNullArgs_0[11] ? (UTF8String) null : ((UTF8String) valueArgs_0[11]);array_0[idxInVararg_0 ++] = isNullArgs_0[12] ? (UTF8String) null : ((UTF8String) valueArgs_0[12]);array_0[idxInVararg_0 ++] = isNullArgs_0[13] ? (UTF8String) null : ((UTF8String) valueArgs_0[13]);array_0[idxInVararg_0 ++] = isNullArgs_0[14] ? (UTF8String) null : ((UTF8String) valueArgs_0[14]);array_0[idxInVararg_0 ++] = isNullArgs_0[15] ? (UTF8String) null : ((UTF8String) valueArgs_0[15]);array_0[idxInVararg_0 ++] = isNullArgs_0[16] ? (UTF8String) null : ((UTF8String) valueArgs_0[16]);array_0[idxInVararg_0 ++] = isNullArgs_0[17] ? (UTF8String) null : ((UTF8String) valueArgs_0[17]);
   /* 268 */     return idxInVararg_0;
   /* 269 */
   /* 270 */   }
   /* 271 */
   /* 272 */ }
   ```
   
   Option 1 looks also viable - I guess there's also a limitation of the number of class variables so utilizing arrays looks more flexible, but it might be really edge-case and we might want to just ignore the case.
   
   I'll rebase this PR with the commit if we prefer the new commit. If we would like to go with class variables I'll try out and provide a new commit.
   
   Thanks!


----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org