You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@zeppelin.apache.org by zj...@apache.org on 2021/08/05 02:28:49 UTC

[zeppelin] branch master updated: [ZEPPELIN-5473] Enable z.show for R

This is an automated email from the ASF dual-hosted git repository.

zjffdu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/zeppelin.git


The following commit(s) were added to refs/heads/master by this push:
     new 64e3e07  [ZEPPELIN-5473] Enable z.show for R
64e3e07 is described below

commit 64e3e0730dbc2d1206876c968fef0bb028fe5efe
Author: Jeff Zhang <zj...@apache.org>
AuthorDate: Wed Aug 4 00:04:16 2021 +0800

    [ZEPPELIN-5473] Enable z.show for R
    
    ### What is this PR for?
    
    Enable `z.show` in `%r.ir` to make it consistent across languages. Unit test and docs are added.
    
    ### What type of PR is it?
    [Improvement]
    
    ### Todos
    * [ ] - Task
    
    ### What is the Jira issue?
    * https://issues.apache.org/jira/browse/ZEPPELIN-5473
    
    ### How should this be tested?
    * Ci pass and manually tested
    
    ### Screenshots (if appropriate)
    ![image](https://user-images.githubusercontent.com/164491/127757473-032de603-9273-4a7a-8803-f5c1f0051f82.png)
    
    ![image](https://user-images.githubusercontent.com/164491/127953841-97cd4139-ae51-4c3f-9f8c-962d1e8f7139.png)
    
    ### Questions:
    * Does the licenses files need update? No
    * Is there breaking changes for older versions? No
    * Does this needs documentation? No
    
    Author: Jeff Zhang <zj...@apache.org>
    
    Closes #4193 from zjffdu/ZEPPELIN-5473 and squashes the following commits:
    
    394b19c4c0 [Jeff Zhang] use zeppelin.R.maxResult to configure maxRows
    b4aef573d9 [Jeff Zhang] Add limit for z.show
    1cbb5876f7 [Jeff Zhang] [ZEPPELIN-5473] Enable z.show for R
---
 .../themes/zeppelin/img/docs-img/r_zshow.png       | Bin 0 -> 93797 bytes
 docs/interpreter/r.md                              |   4 ++
 .../java/org/apache/zeppelin/r/IRInterpreter.java  |  12 ++++++
 .../java/org/apache/zeppelin/r/RInterpreter.java   |   2 +-
 rlang/src/main/resources/R/zeppelin_isparkr.R      |  20 ++++++++-
 rlang/src/main/resources/interpreter-setting.json  |   7 +++
 .../org/apache/zeppelin/r/IRInterpreterTest.java   |  47 +++++++++++++++++++++
 rlang/src/test/resources/log4j.properties          |   2 +-
 .../interpreter/InterpreterOutputTest.java         |  10 +++++
 9 files changed, 101 insertions(+), 3 deletions(-)

diff --git a/docs/assets/themes/zeppelin/img/docs-img/r_zshow.png b/docs/assets/themes/zeppelin/img/docs-img/r_zshow.png
new file mode 100644
index 0000000..5803410
Binary files /dev/null and b/docs/assets/themes/zeppelin/img/docs-img/r_zshow.png differ
diff --git a/docs/interpreter/r.md b/docs/interpreter/r.md
index 15bbe2c..ef61f4b 100644
--- a/docs/interpreter/r.md
+++ b/docs/interpreter/r.md
@@ -162,7 +162,11 @@ Besides R base plotting, you can use other visualization library, e.g. `ggplot`
 
 <img class="img-responsive" src="{{BASE_PATH}}/assets/themes/zeppelin/img/docs-img/r_googlevis.png" width="800px"/>
 
+You can also use `z.show()` in `%r.ir` to visualize the dataframe, e.g.
 
+<img class="img-responsive" src="{{BASE_PATH}}/assets/themes/zeppelin/img/docs-img/r_zshow.png" width="800px"/>
+
+  
 ## Make Shiny App in Zeppelin
 
 [Shiny](https://shiny.rstudio.com/tutorial/) is an R package that makes it easy to build interactive web applications (apps) straight from R.
diff --git a/rlang/src/main/java/org/apache/zeppelin/r/IRInterpreter.java b/rlang/src/main/java/org/apache/zeppelin/r/IRInterpreter.java
index ef6d60d..ef79fd5 100644
--- a/rlang/src/main/java/org/apache/zeppelin/r/IRInterpreter.java
+++ b/rlang/src/main/java/org/apache/zeppelin/r/IRInterpreter.java
@@ -48,6 +48,7 @@ import java.util.Properties;
 public class IRInterpreter extends JupyterKernelInterpreter {
 
   private static final Logger LOGGER = LoggerFactory.getLogger(IRInterpreter.class);
+  private static RZeppelinContext z;
 
   // It is used to store shiny related code (ui.R & server.R)
   // only one shiny app can be hosted in one R session.
@@ -104,6 +105,13 @@ public class IRInterpreter extends JupyterKernelInterpreter {
       }
     }
 
+    synchronized (IRInterpreter.class) {
+      if (this.z == null) {
+        z = new RZeppelinContext(getInterpreterGroup().getInterpreterHookRegistry(),
+                Integer.parseInt(getProperty("zeppelin.R.maxResult", "1000")));
+      }
+    }
+
     try {
       initIRKernel();
     } catch (IOException e) {
@@ -197,4 +205,8 @@ public class IRInterpreter extends JupyterKernelInterpreter {
       getKernelProcessLauncher().setRedirectedContext(null);
     }
   }
+
+  public static RZeppelinContext getRZeppelinContext() {
+    return z;
+  }
 }
diff --git a/rlang/src/main/java/org/apache/zeppelin/r/RInterpreter.java b/rlang/src/main/java/org/apache/zeppelin/r/RInterpreter.java
index 4a7af23..2a72c9c 100644
--- a/rlang/src/main/java/org/apache/zeppelin/r/RInterpreter.java
+++ b/rlang/src/main/java/org/apache/zeppelin/r/RInterpreter.java
@@ -98,7 +98,7 @@ public class RInterpreter extends AbstractInterpreter {
     synchronized (RInterpreter.class) {
       if (this.z == null) {
         z = new RZeppelinContext(getInterpreterGroup().getInterpreterHookRegistry(),
-                Integer.parseInt(getProperty("zeppelin.r.maxResult", "1000")));
+                Integer.parseInt(getProperty("zeppelin.R.maxResult", "1000")));
       }
     }
     this.renderOptions = getProperty("zeppelin.R.render.options",
diff --git a/rlang/src/main/resources/R/zeppelin_isparkr.R b/rlang/src/main/resources/R/zeppelin_isparkr.R
index f279ee1..7c67f14 100644
--- a/rlang/src/main/resources/R/zeppelin_isparkr.R
+++ b/rlang/src/main/resources/R/zeppelin_isparkr.R
@@ -37,6 +37,7 @@ if (is.null(authSecret) || authSecret == '') {
 
 # scStartTime is needed by R/pkg/R/sparkR.R
 assign(".scStartTime", as.integer(Sys.time()), envir = SparkR:::.sparkREnv)
+assign(".maxRows", as.integer())
 
 if (isSparkSupported == "true") {
   # setup spark env
@@ -51,7 +52,7 @@ if (isSparkSupported == "true") {
   assign("sqlContext", get(".sqlc", envir = SparkR:::.sparkREnv), envir = .GlobalEnv)
   assign(".zeppelinContext", SparkR:::callJStatic("org.apache.zeppelin.spark.ZeppelinRContext", "getZeppelinContext"), envir = .GlobalEnv)
 } else {
-  assign(".zeppelinContext", SparkR:::callJStatic("org.apache.zeppelin.r.RInterpreter", "getRZeppelinContext"), envir = .GlobalEnv)
+  assign(".zeppelinContext", SparkR:::callJStatic("org.apache.zeppelin.r.IRInterpreter", "getRZeppelinContext"), envir = .GlobalEnv)
 }
 
 z.put <- function(name, object) {
@@ -113,4 +114,21 @@ z.angularBind <- function(name, value) {
 
 z.angularUnbind <- function(name, value) {
   SparkR:::callJMethod(.zeppelinContext, "angularUnbind", name)
+}
+
+z.show <- function(data, maxRows=SparkR:::callJMethod(.zeppelinContext, "getMaxResult")) {
+  if (is.data.frame(data)) {
+    resultString = c(paste(colnames(data),  collapse ="\t"))
+    for (row in 1: min(nrow(data), maxRows)) {
+      rowString <- paste(data[row,], collapse ="\t")
+      resultString = c(resultString, rowString)
+    }
+    a=paste(resultString, collapse="\n")
+    cat("\n%table ", a, "\n\n%text ", sep="")
+    if (nrow(data) > maxRows) {
+      cat("\n%html <font color=red>Results are limited by ", maxRows, " rows.</font>", "\n%text ", sep="")
+    }
+  } else {
+    cat(data)
+  }
 }
\ No newline at end of file
diff --git a/rlang/src/main/resources/interpreter-setting.json b/rlang/src/main/resources/interpreter-setting.json
index f0a9677..654c1ee 100644
--- a/rlang/src/main/resources/interpreter-setting.json
+++ b/rlang/src/main/resources/interpreter-setting.json
@@ -18,6 +18,13 @@
         "description": "R binary executable path",
         "type": "string"
       },
+      "zeppelin.R.maxResult": {
+        "envName": null,
+        "propertyName": "zeppelin.R.maxResult",
+        "defaultValue": "1000",
+        "description": "Max number of dataframe rows to display.",
+        "type": "number"
+      },
       "zeppelin.R.image.width": {
         "envName": "ZEPPELIN_R_IMAGE_WIDTH",
         "propertyName": "zeppelin.R.image.width",
diff --git a/rlang/src/test/java/org/apache/zeppelin/r/IRInterpreterTest.java b/rlang/src/test/java/org/apache/zeppelin/r/IRInterpreterTest.java
index 44a45e4..bd8a3b5 100644
--- a/rlang/src/test/java/org/apache/zeppelin/r/IRInterpreterTest.java
+++ b/rlang/src/test/java/org/apache/zeppelin/r/IRInterpreterTest.java
@@ -19,12 +19,21 @@ package org.apache.zeppelin.r;
 
 import org.apache.zeppelin.interpreter.Interpreter;
 import org.apache.zeppelin.interpreter.InterpreterContext;
+import org.apache.zeppelin.interpreter.InterpreterException;
 import org.apache.zeppelin.interpreter.InterpreterOutput;
+import org.apache.zeppelin.interpreter.InterpreterResult;
+import org.apache.zeppelin.interpreter.InterpreterResultMessage;
 import org.apache.zeppelin.jupyter.IRKernelTest;
+import org.junit.Test;
 
+import java.io.IOException;
 import java.util.HashMap;
+import java.util.List;
 import java.util.Properties;
 
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
 
 public class IRInterpreterTest extends IRKernelTest {
 
@@ -43,4 +52,42 @@ public class IRInterpreterTest extends IRKernelTest {
             .build();
     return context;
   }
+
+  @Test
+  public void testZShow() throws InterpreterException, IOException {
+    InterpreterContext context = getInterpreterContext();
+    InterpreterResult result = interpreter.interpret(
+            "df=data.frame(country=c(\"US\", \"GB\", \"BR\"),\n" +
+            "val1=c(10,13,14),\n" +
+            "val2=c(23,12,32))", context);
+    assertEquals(InterpreterResult.Code.SUCCESS, result.code());
+
+    context = getInterpreterContext();
+    result = interpreter.interpret("z.show(df)", context);
+    assertEquals(InterpreterResult.Code.SUCCESS, result.code());
+    List<InterpreterResultMessage> resultMessages = context.out.toInterpreterResultMessage();
+    assertEquals(1, resultMessages.size());
+    assertEquals(resultMessages.toString(),
+            InterpreterResult.Type.TABLE, resultMessages.get(0).getType());
+    assertEquals("country\tval1\tval2\n" +
+                    "3\t10\t23\n" +
+                    "2\t13\t12\n" +
+                    "1\t14\t32\n",
+            resultMessages.get(0).getData());
+
+    context = getInterpreterContext();
+    result = interpreter.interpret("z.show(df, maxRows=1)", context);
+    assertEquals(InterpreterResult.Code.SUCCESS, result.code());
+    resultMessages = context.out.toInterpreterResultMessage();
+    assertEquals(2, resultMessages.size());
+    assertEquals(resultMessages.toString(),
+            InterpreterResult.Type.TABLE, resultMessages.get(0).getType());
+    assertEquals("country\tval1\tval2\n" +
+                    "3\t10\t23\n",
+            resultMessages.get(0).getData());
+    assertEquals(resultMessages.toString(),
+            InterpreterResult.Type.HTML, resultMessages.get(1).getType());
+    assertEquals("<font color=red>Results are limited by 1 rows.</font>\n",
+            resultMessages.get(1).getData());
+  }
 }
diff --git a/rlang/src/test/resources/log4j.properties b/rlang/src/test/resources/log4j.properties
index ed2672f..fa19880 100644
--- a/rlang/src/test/resources/log4j.properties
+++ b/rlang/src/test/resources/log4j.properties
@@ -24,4 +24,4 @@ log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
 log4j.appender.stdout.layout.ConversionPattern=%5p [%d] ({%t} %F[%M]:%L) - %m%n
 
 #log4j.logger.org.apache.zeppelin.interpreter.util=DEBUG
-#log4j.logger.org.apache.zeppelin.jupyter=DEBUG
+log4j.logger.org.apache.zeppelin.jupyter=DEBUG
diff --git a/zeppelin-interpreter/src/test/java/org/apache/zeppelin/interpreter/InterpreterOutputTest.java b/zeppelin-interpreter/src/test/java/org/apache/zeppelin/interpreter/InterpreterOutputTest.java
index 195bfcb..e07ec62 100644
--- a/zeppelin-interpreter/src/test/java/org/apache/zeppelin/interpreter/InterpreterOutputTest.java
+++ b/zeppelin-interpreter/src/test/java/org/apache/zeppelin/interpreter/InterpreterOutputTest.java
@@ -164,6 +164,16 @@ public class InterpreterOutputTest implements InterpreterOutputListener {
   }
 
   @Test
+  public void testMixTableText() throws IOException {
+    out.write("%table col1\tcol2\n\n%text val1".getBytes());
+    assertEquals(InterpreterResult.Type.TABLE, out.getOutputAt(0).getType());
+    assertEquals("col1\tcol2\n", new String(out.getOutputAt(0).toByteArray()));
+    out.flush();
+    assertEquals(InterpreterResult.Type.TEXT, out.getOutputAt(1).getType());
+    assertEquals("val1", new String(out.getOutputAt(1).toByteArray()));
+  }
+
+  @Test
   public void testTruncate() throws IOException {
     // output is truncated after the new line
     InterpreterOutput.LIMIT = 3;