You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by hv...@apache.org on 2016/11/28 12:18:40 UTC

spark git commit: [SPARK-18118][SQL] fix a compilation error due to nested JavaBeans

Repository: spark
Updated Branches:
  refs/heads/master 454b80499 -> f075cd9cb


[SPARK-18118][SQL] fix a compilation error due to nested JavaBeans

## What changes were proposed in this pull request?

This PR avoids a compilation error due to more than 64KB Java byte code size. This error occur since generated java code `SpecificSafeProjection.apply()` for nested JavaBeans is too big. This PR avoids this compilation error by splitting a big code chunk into multiple methods by calling `CodegenContext.splitExpression` at `InitializeJavaBean.doGenCode`
An object reference for JavaBean is stored to an instance variable `javaBean...`. Then, the instance variable will be referenced in the split methods.

Generated code with this PR
````
/* 22098 */   private void apply130_0(InternalRow i) {
...
/* 22125 */     boolean isNull238 = i.isNullAt(2);
/* 22126 */     InternalRow value238 = isNull238 ? null : (i.getStruct(2, 3));
/* 22127 */     boolean isNull236 = false;
/* 22128 */     test.org.apache.spark.sql.JavaDatasetSuite$Nesting1 value236 = null;
/* 22129 */     if (!false && isNull238) {
/* 22130 */
/* 22131 */       final test.org.apache.spark.sql.JavaDatasetSuite$Nesting1 value239 = null;
/* 22132 */       isNull236 = true;
/* 22133 */       value236 = value239;
/* 22134 */     } else {
/* 22135 */
/* 22136 */       final test.org.apache.spark.sql.JavaDatasetSuite$Nesting1 value241 = false ? null : new test.org.apache.spark.sql.JavaDatasetSuite$Nesting1();
/* 22137 */       this.javaBean14 = value241;
/* 22138 */       if (!false) {
/* 22139 */         apply25_0(i);
/* 22140 */         apply25_1(i);
/* 22141 */         apply25_2(i);
/* 22142 */       }
/* 22143 */       isNull236 = false;
/* 22144 */       value236 = value241;
/* 22145 */     }
/* 22146 */     this.javaBean.setField2(value236);
/* 22147 */
/* 22148 */   }
...
/* 22928 */   public java.lang.Object apply(java.lang.Object _i) {
/* 22929 */     InternalRow i = (InternalRow) _i;
/* 22930 */
/* 22931 */     final test.org.apache.spark.sql.JavaDatasetSuite$NestedComplicatedJavaBean value1 = false ? null : new test.org.apache.spark.sql.JavaDatasetSuite$NestedComplicatedJavaBean();
/* 22932 */     this.javaBean = value1;
/* 22933 */     if (!false) {
/* 22934 */       apply130_0(i);
/* 22935 */       apply130_1(i);
/* 22936 */       apply130_2(i);
/* 22937 */       apply130_3(i);
/* 22938 */       apply130_4(i);
/* 22939 */     }
/* 22940 */     if (false) {
/* 22941 */       mutableRow.setNullAt(0);
/* 22942 */     } else {
/* 22943 */
/* 22944 */       mutableRow.update(0, value1);
/* 22945 */     }
/* 22946 */
/* 22947 */     return mutableRow;
/* 22948 */   }
````

## How was this patch tested?

added a test suite into `JavaDatasetSuite.java`

Author: Kazuaki Ishizaki <is...@jp.ibm.com>

Closes #16032 from kiszk/SPARK-18118.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/f075cd9c
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/f075cd9c
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/f075cd9c

Branch: refs/heads/master
Commit: f075cd9cb7157819df9aec67baee8913c4ed5c53
Parents: 454b804
Author: Kazuaki Ishizaki <is...@jp.ibm.com>
Authored: Mon Nov 28 04:18:35 2016 -0800
Committer: Herman van Hovell <hv...@databricks.com>
Committed: Mon Nov 28 04:18:35 2016 -0800

----------------------------------------------------------------------
 .../catalyst/expressions/objects/objects.scala  |  10 +-
 .../org/apache/spark/sql/JavaDatasetSuite.java  | 429 +++++++++++++++++++
 2 files changed, 437 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/f075cd9c/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala
index 5c27179..6952f54 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala
@@ -896,19 +896,25 @@ case class InitializeJavaBean(beanInstance: Expression, setters: Map[String, Exp
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
     val instanceGen = beanInstance.genCode(ctx)
 
+    val javaBeanInstance = ctx.freshName("javaBean")
+    val beanInstanceJavaType = ctx.javaType(beanInstance.dataType)
+    ctx.addMutableState(beanInstanceJavaType, javaBeanInstance, "")
+
     val initialize = setters.map {
       case (setterMethod, fieldValue) =>
         val fieldGen = fieldValue.genCode(ctx)
         s"""
            ${fieldGen.code}
-           ${instanceGen.value}.$setterMethod(${fieldGen.value});
+           this.${javaBeanInstance}.$setterMethod(${fieldGen.value});
          """
     }
+    val initializeCode = ctx.splitExpressions(ctx.INPUT_ROW, initialize.toSeq)
 
     val code = s"""
       ${instanceGen.code}
+      this.${javaBeanInstance} = ${instanceGen.value};
       if (!${instanceGen.isNull}) {
-        ${initialize.mkString("\n")}
+        $initializeCode
       }
      """
     ev.copy(code = code, isNull = instanceGen.isNull, value = instanceGen.value)

http://git-wip-us.apache.org/repos/asf/spark/blob/f075cd9c/sql/core/src/test/java/test/org/apache/spark/sql/JavaDatasetSuite.java
----------------------------------------------------------------------
diff --git a/sql/core/src/test/java/test/org/apache/spark/sql/JavaDatasetSuite.java b/sql/core/src/test/java/test/org/apache/spark/sql/JavaDatasetSuite.java
index 96e8fb06..8304b72 100644
--- a/sql/core/src/test/java/test/org/apache/spark/sql/JavaDatasetSuite.java
+++ b/sql/core/src/test/java/test/org/apache/spark/sql/JavaDatasetSuite.java
@@ -876,4 +876,433 @@ public class JavaDatasetSuite implements Serializable {
       ds.collect();
     }
   }
+
+  public static class Nesting3 implements Serializable {
+    private Integer field3_1;
+    private Double field3_2;
+    private String field3_3;
+
+    public Nesting3() {
+    }
+
+    public Nesting3(Integer field3_1, Double field3_2, String field3_3) {
+      this.field3_1 = field3_1;
+      this.field3_2 = field3_2;
+      this.field3_3 = field3_3;
+    }
+
+    private Nesting3(Builder builder) {
+      setField3_1(builder.field3_1);
+      setField3_2(builder.field3_2);
+      setField3_3(builder.field3_3);
+    }
+
+    public static Builder newBuilder() {
+      return new Builder();
+    }
+
+    public Integer getField3_1() {
+      return field3_1;
+    }
+
+    public void setField3_1(Integer field3_1) {
+      this.field3_1 = field3_1;
+    }
+
+    public Double getField3_2() {
+      return field3_2;
+    }
+
+    public void setField3_2(Double field3_2) {
+      this.field3_2 = field3_2;
+    }
+
+    public String getField3_3() {
+      return field3_3;
+    }
+
+    public void setField3_3(String field3_3) {
+      this.field3_3 = field3_3;
+    }
+
+    public static final class Builder {
+      private Integer field3_1 = 0;
+      private Double field3_2 = 0.0;
+      private String field3_3 = "value";
+
+      private Builder() {
+      }
+
+      public Builder field3_1(Integer field3_1) {
+        this.field3_1 = field3_1;
+        return this;
+      }
+
+      public Builder field3_2(Double field3_2) {
+        this.field3_2 = field3_2;
+        return this;
+      }
+
+      public Builder field3_3(String field3_3) {
+        this.field3_3 = field3_3;
+        return this;
+      }
+
+      public Nesting3 build() {
+        return new Nesting3(this);
+      }
+    }
+  }
+
+  public static class Nesting2 implements Serializable {
+    private Nesting3 field2_1;
+    private Nesting3 field2_2;
+    private Nesting3 field2_3;
+
+    public Nesting2() {
+    }
+
+    public Nesting2(Nesting3 field2_1, Nesting3 field2_2, Nesting3 field2_3) {
+      this.field2_1 = field2_1;
+      this.field2_2 = field2_2;
+      this.field2_3 = field2_3;
+    }
+
+    private Nesting2(Builder builder) {
+      setField2_1(builder.field2_1);
+      setField2_2(builder.field2_2);
+      setField2_3(builder.field2_3);
+    }
+
+    public static Builder newBuilder() {
+      return new Builder();
+    }
+
+    public Nesting3 getField2_1() {
+      return field2_1;
+    }
+
+    public void setField2_1(Nesting3 field2_1) {
+      this.field2_1 = field2_1;
+    }
+
+    public Nesting3 getField2_2() {
+      return field2_2;
+    }
+
+    public void setField2_2(Nesting3 field2_2) {
+      this.field2_2 = field2_2;
+    }
+
+    public Nesting3 getField2_3() {
+      return field2_3;
+    }
+
+    public void setField2_3(Nesting3 field2_3) {
+      this.field2_3 = field2_3;
+    }
+
+
+    public static final class Builder {
+      private Nesting3 field2_1 = Nesting3.newBuilder().build();
+      private Nesting3 field2_2 = Nesting3.newBuilder().build();
+      private Nesting3 field2_3 = Nesting3.newBuilder().build();
+
+      private Builder() {
+      }
+
+      public Builder field2_1(Nesting3 field2_1) {
+        this.field2_1 = field2_1;
+        return this;
+      }
+
+      public Builder field2_2(Nesting3 field2_2) {
+        this.field2_2 = field2_2;
+        return this;
+      }
+
+      public Builder field2_3(Nesting3 field2_3) {
+        this.field2_3 = field2_3;
+        return this;
+      }
+
+      public Nesting2 build() {
+        return new Nesting2(this);
+      }
+    }
+  }
+
+  public static class Nesting1 implements Serializable {
+    private Nesting2 field1_1;
+    private Nesting2 field1_2;
+    private Nesting2 field1_3;
+
+    public Nesting1() {
+    }
+
+    public Nesting1(Nesting2 field1_1, Nesting2 field1_2, Nesting2 field1_3) {
+      this.field1_1 = field1_1;
+      this.field1_2 = field1_2;
+      this.field1_3 = field1_3;
+    }
+
+    private Nesting1(Builder builder) {
+      setField1_1(builder.field1_1);
+      setField1_2(builder.field1_2);
+      setField1_3(builder.field1_3);
+    }
+
+    public static Builder newBuilder() {
+      return new Builder();
+    }
+
+    public Nesting2 getField1_1() {
+      return field1_1;
+    }
+
+    public void setField1_1(Nesting2 field1_1) {
+      this.field1_1 = field1_1;
+    }
+
+    public Nesting2 getField1_2() {
+      return field1_2;
+    }
+
+    public void setField1_2(Nesting2 field1_2) {
+      this.field1_2 = field1_2;
+    }
+
+    public Nesting2 getField1_3() {
+      return field1_3;
+    }
+
+    public void setField1_3(Nesting2 field1_3) {
+      this.field1_3 = field1_3;
+    }
+
+
+    public static final class Builder {
+      private Nesting2 field1_1 = Nesting2.newBuilder().build();
+      private Nesting2 field1_2 = Nesting2.newBuilder().build();
+      private Nesting2 field1_3 = Nesting2.newBuilder().build();
+
+      private Builder() {
+      }
+
+      public Builder field1_1(Nesting2 field1_1) {
+        this.field1_1 = field1_1;
+        return this;
+      }
+
+      public Builder field1_2(Nesting2 field1_2) {
+        this.field1_2 = field1_2;
+        return this;
+      }
+
+      public Builder field1_3(Nesting2 field1_3) {
+        this.field1_3 = field1_3;
+        return this;
+      }
+
+      public Nesting1 build() {
+        return new Nesting1(this);
+      }
+    }
+  }
+
+  public static class NestedComplicatedJavaBean implements Serializable {
+    private Nesting1 field1;
+    private Nesting1 field2;
+    private Nesting1 field3;
+    private Nesting1 field4;
+    private Nesting1 field5;
+    private Nesting1 field6;
+    private Nesting1 field7;
+    private Nesting1 field8;
+    private Nesting1 field9;
+    private Nesting1 field10;
+
+    public NestedComplicatedJavaBean() {
+    }
+
+    private NestedComplicatedJavaBean(Builder builder) {
+      setField1(builder.field1);
+      setField2(builder.field2);
+      setField3(builder.field3);
+      setField4(builder.field4);
+      setField5(builder.field5);
+      setField6(builder.field6);
+      setField7(builder.field7);
+      setField8(builder.field8);
+      setField9(builder.field9);
+      setField10(builder.field10);
+    }
+
+    public static Builder newBuilder() {
+      return new Builder();
+    }
+
+    public Nesting1 getField1() {
+      return field1;
+    }
+
+    public void setField1(Nesting1 field1) {
+      this.field1 = field1;
+    }
+
+    public Nesting1 getField2() {
+      return field2;
+    }
+
+    public void setField2(Nesting1 field2) {
+      this.field2 = field2;
+    }
+
+    public Nesting1 getField3() {
+      return field3;
+    }
+
+    public void setField3(Nesting1 field3) {
+      this.field3 = field3;
+    }
+
+    public Nesting1 getField4() {
+      return field4;
+    }
+
+    public void setField4(Nesting1 field4) {
+      this.field4 = field4;
+    }
+
+    public Nesting1 getField5() {
+      return field5;
+    }
+
+    public void setField5(Nesting1 field5) {
+      this.field5 = field5;
+    }
+
+    public Nesting1 getField6() {
+      return field6;
+    }
+
+    public void setField6(Nesting1 field6) {
+      this.field6 = field6;
+    }
+
+    public Nesting1 getField7() {
+      return field7;
+    }
+
+    public void setField7(Nesting1 field7) {
+      this.field7 = field7;
+    }
+
+    public Nesting1 getField8() {
+      return field8;
+    }
+
+    public void setField8(Nesting1 field8) {
+      this.field8 = field8;
+    }
+
+    public Nesting1 getField9() {
+      return field9;
+    }
+
+    public void setField9(Nesting1 field9) {
+      this.field9 = field9;
+    }
+
+    public Nesting1 getField10() {
+      return field10;
+    }
+
+    public void setField10(Nesting1 field10) {
+      this.field10 = field10;
+    }
+
+    public static final class Builder {
+      private Nesting1 field1 = Nesting1.newBuilder().build();
+      private Nesting1 field2 = Nesting1.newBuilder().build();
+      private Nesting1 field3 = Nesting1.newBuilder().build();
+      private Nesting1 field4 = Nesting1.newBuilder().build();
+      private Nesting1 field5 = Nesting1.newBuilder().build();
+      private Nesting1 field6 = Nesting1.newBuilder().build();
+      private Nesting1 field7 = Nesting1.newBuilder().build();
+      private Nesting1 field8 = Nesting1.newBuilder().build();
+      private Nesting1 field9 = Nesting1.newBuilder().build();
+      private Nesting1 field10 = Nesting1.newBuilder().build();
+
+      private Builder() {
+      }
+
+      public Builder field1(Nesting1 field1) {
+        this.field1 = field1;
+        return this;
+      }
+
+      public Builder field2(Nesting1 field2) {
+        this.field2 = field2;
+        return this;
+      }
+
+      public Builder field3(Nesting1 field3) {
+        this.field3 = field3;
+        return this;
+      }
+
+      public Builder field4(Nesting1 field4) {
+        this.field4 = field4;
+        return this;
+      }
+
+      public Builder field5(Nesting1 field5) {
+        this.field5 = field5;
+        return this;
+      }
+
+      public Builder field6(Nesting1 field6) {
+        this.field6 = field6;
+        return this;
+      }
+
+      public Builder field7(Nesting1 field7) {
+        this.field7 = field7;
+        return this;
+      }
+
+      public Builder field8(Nesting1 field8) {
+        this.field8 = field8;
+        return this;
+      }
+
+      public Builder field9(Nesting1 field9) {
+        this.field9 = field9;
+        return this;
+      }
+
+      public Builder field10(Nesting1 field10) {
+        this.field10 = field10;
+        return this;
+      }
+
+      public NestedComplicatedJavaBean build() {
+        return new NestedComplicatedJavaBean(this);
+      }
+    }
+  }
+
+  @Test
+  public void test() {
+    /* SPARK-15285 Large numbers of Nested JavaBeans generates more than 64KB java bytecode */
+    List<NestedComplicatedJavaBean> data = new ArrayList<NestedComplicatedJavaBean>();
+    data.add(NestedComplicatedJavaBean.newBuilder().build());
+
+    NestedComplicatedJavaBean obj3 = new NestedComplicatedJavaBean();
+
+    Dataset<NestedComplicatedJavaBean> ds =
+      spark.createDataset(data, Encoders.bean(NestedComplicatedJavaBean.class));
+    ds.collectAsList();
+  }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org