You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@zeppelin.apache.org by zj...@apache.org on 2021/08/05 02:28:49 UTC
[zeppelin] branch master updated: [ZEPPELIN-5473] Enable z.show for
R
This is an automated email from the ASF dual-hosted git repository.
zjffdu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/zeppelin.git
The following commit(s) were added to refs/heads/master by this push:
new 64e3e07 [ZEPPELIN-5473] Enable z.show for R
64e3e07 is described below
commit 64e3e0730dbc2d1206876c968fef0bb028fe5efe
Author: Jeff Zhang <zj...@apache.org>
AuthorDate: Wed Aug 4 00:04:16 2021 +0800
[ZEPPELIN-5473] Enable z.show for R
### What is this PR for?
Enable `z.show` in `%r.ir` to make it consistent across languages. Unit test and docs are added.
### What type of PR is it?
[Improvement]
### Todos
* [ ] - Task
### What is the Jira issue?
* https://issues.apache.org/jira/browse/ZEPPELIN-5473
### How should this be tested?
* Ci pass and manually tested
### Screenshots (if appropriate)
![image](https://user-images.githubusercontent.com/164491/127757473-032de603-9273-4a7a-8803-f5c1f0051f82.png)
![image](https://user-images.githubusercontent.com/164491/127953841-97cd4139-ae51-4c3f-9f8c-962d1e8f7139.png)
### Questions:
* Does the licenses files need update? No
* Is there breaking changes for older versions? No
* Does this needs documentation? No
Author: Jeff Zhang <zj...@apache.org>
Closes #4193 from zjffdu/ZEPPELIN-5473 and squashes the following commits:
394b19c4c0 [Jeff Zhang] use zeppelin.R.maxResult to configure maxRows
b4aef573d9 [Jeff Zhang] Add limit for z.show
1cbb5876f7 [Jeff Zhang] [ZEPPELIN-5473] Enable z.show for R
---
.../themes/zeppelin/img/docs-img/r_zshow.png | Bin 0 -> 93797 bytes
docs/interpreter/r.md | 4 ++
.../java/org/apache/zeppelin/r/IRInterpreter.java | 12 ++++++
.../java/org/apache/zeppelin/r/RInterpreter.java | 2 +-
rlang/src/main/resources/R/zeppelin_isparkr.R | 20 ++++++++-
rlang/src/main/resources/interpreter-setting.json | 7 +++
.../org/apache/zeppelin/r/IRInterpreterTest.java | 47 +++++++++++++++++++++
rlang/src/test/resources/log4j.properties | 2 +-
.../interpreter/InterpreterOutputTest.java | 10 +++++
9 files changed, 101 insertions(+), 3 deletions(-)
diff --git a/docs/assets/themes/zeppelin/img/docs-img/r_zshow.png b/docs/assets/themes/zeppelin/img/docs-img/r_zshow.png
new file mode 100644
index 0000000..5803410
Binary files /dev/null and b/docs/assets/themes/zeppelin/img/docs-img/r_zshow.png differ
diff --git a/docs/interpreter/r.md b/docs/interpreter/r.md
index 15bbe2c..ef61f4b 100644
--- a/docs/interpreter/r.md
+++ b/docs/interpreter/r.md
@@ -162,7 +162,11 @@ Besides R base plotting, you can use other visualization library, e.g. `ggplot`
<img class="img-responsive" src="{{BASE_PATH}}/assets/themes/zeppelin/img/docs-img/r_googlevis.png" width="800px"/>
+You can also use `z.show()` in `%r.ir` to visualize the dataframe, e.g.
+<img class="img-responsive" src="{{BASE_PATH}}/assets/themes/zeppelin/img/docs-img/r_zshow.png" width="800px"/>
+
+
## Make Shiny App in Zeppelin
[Shiny](https://shiny.rstudio.com/tutorial/) is an R package that makes it easy to build interactive web applications (apps) straight from R.
diff --git a/rlang/src/main/java/org/apache/zeppelin/r/IRInterpreter.java b/rlang/src/main/java/org/apache/zeppelin/r/IRInterpreter.java
index ef6d60d..ef79fd5 100644
--- a/rlang/src/main/java/org/apache/zeppelin/r/IRInterpreter.java
+++ b/rlang/src/main/java/org/apache/zeppelin/r/IRInterpreter.java
@@ -48,6 +48,7 @@ import java.util.Properties;
public class IRInterpreter extends JupyterKernelInterpreter {
private static final Logger LOGGER = LoggerFactory.getLogger(IRInterpreter.class);
+ private static RZeppelinContext z;
// It is used to store shiny related code (ui.R & server.R)
// only one shiny app can be hosted in one R session.
@@ -104,6 +105,13 @@ public class IRInterpreter extends JupyterKernelInterpreter {
}
}
+ synchronized (IRInterpreter.class) {
+ if (this.z == null) {
+ z = new RZeppelinContext(getInterpreterGroup().getInterpreterHookRegistry(),
+ Integer.parseInt(getProperty("zeppelin.R.maxResult", "1000")));
+ }
+ }
+
try {
initIRKernel();
} catch (IOException e) {
@@ -197,4 +205,8 @@ public class IRInterpreter extends JupyterKernelInterpreter {
getKernelProcessLauncher().setRedirectedContext(null);
}
}
+
+ public static RZeppelinContext getRZeppelinContext() {
+ return z;
+ }
}
diff --git a/rlang/src/main/java/org/apache/zeppelin/r/RInterpreter.java b/rlang/src/main/java/org/apache/zeppelin/r/RInterpreter.java
index 4a7af23..2a72c9c 100644
--- a/rlang/src/main/java/org/apache/zeppelin/r/RInterpreter.java
+++ b/rlang/src/main/java/org/apache/zeppelin/r/RInterpreter.java
@@ -98,7 +98,7 @@ public class RInterpreter extends AbstractInterpreter {
synchronized (RInterpreter.class) {
if (this.z == null) {
z = new RZeppelinContext(getInterpreterGroup().getInterpreterHookRegistry(),
- Integer.parseInt(getProperty("zeppelin.r.maxResult", "1000")));
+ Integer.parseInt(getProperty("zeppelin.R.maxResult", "1000")));
}
}
this.renderOptions = getProperty("zeppelin.R.render.options",
diff --git a/rlang/src/main/resources/R/zeppelin_isparkr.R b/rlang/src/main/resources/R/zeppelin_isparkr.R
index f279ee1..7c67f14 100644
--- a/rlang/src/main/resources/R/zeppelin_isparkr.R
+++ b/rlang/src/main/resources/R/zeppelin_isparkr.R
@@ -37,6 +37,7 @@ if (is.null(authSecret) || authSecret == '') {
# scStartTime is needed by R/pkg/R/sparkR.R
assign(".scStartTime", as.integer(Sys.time()), envir = SparkR:::.sparkREnv)
+assign(".maxRows", as.integer())
if (isSparkSupported == "true") {
# setup spark env
@@ -51,7 +52,7 @@ if (isSparkSupported == "true") {
assign("sqlContext", get(".sqlc", envir = SparkR:::.sparkREnv), envir = .GlobalEnv)
assign(".zeppelinContext", SparkR:::callJStatic("org.apache.zeppelin.spark.ZeppelinRContext", "getZeppelinContext"), envir = .GlobalEnv)
} else {
- assign(".zeppelinContext", SparkR:::callJStatic("org.apache.zeppelin.r.RInterpreter", "getRZeppelinContext"), envir = .GlobalEnv)
+ assign(".zeppelinContext", SparkR:::callJStatic("org.apache.zeppelin.r.IRInterpreter", "getRZeppelinContext"), envir = .GlobalEnv)
}
z.put <- function(name, object) {
@@ -113,4 +114,21 @@ z.angularBind <- function(name, value) {
z.angularUnbind <- function(name, value) {
SparkR:::callJMethod(.zeppelinContext, "angularUnbind", name)
+}
+
+z.show <- function(data, maxRows=SparkR:::callJMethod(.zeppelinContext, "getMaxResult")) {
+ if (is.data.frame(data)) {
+ resultString = c(paste(colnames(data), collapse ="\t"))
+ for (row in 1: min(nrow(data), maxRows)) {
+ rowString <- paste(data[row,], collapse ="\t")
+ resultString = c(resultString, rowString)
+ }
+ a=paste(resultString, collapse="\n")
+ cat("\n%table ", a, "\n\n%text ", sep="")
+ if (nrow(data) > maxRows) {
+ cat("\n%html <font color=red>Results are limited by ", maxRows, " rows.</font>", "\n%text ", sep="")
+ }
+ } else {
+ cat(data)
+ }
}
\ No newline at end of file
diff --git a/rlang/src/main/resources/interpreter-setting.json b/rlang/src/main/resources/interpreter-setting.json
index f0a9677..654c1ee 100644
--- a/rlang/src/main/resources/interpreter-setting.json
+++ b/rlang/src/main/resources/interpreter-setting.json
@@ -18,6 +18,13 @@
"description": "R binary executable path",
"type": "string"
},
+ "zeppelin.R.maxResult": {
+ "envName": null,
+ "propertyName": "zeppelin.R.maxResult",
+ "defaultValue": "1000",
+ "description": "Max number of dataframe rows to display.",
+ "type": "number"
+ },
"zeppelin.R.image.width": {
"envName": "ZEPPELIN_R_IMAGE_WIDTH",
"propertyName": "zeppelin.R.image.width",
diff --git a/rlang/src/test/java/org/apache/zeppelin/r/IRInterpreterTest.java b/rlang/src/test/java/org/apache/zeppelin/r/IRInterpreterTest.java
index 44a45e4..bd8a3b5 100644
--- a/rlang/src/test/java/org/apache/zeppelin/r/IRInterpreterTest.java
+++ b/rlang/src/test/java/org/apache/zeppelin/r/IRInterpreterTest.java
@@ -19,12 +19,21 @@ package org.apache.zeppelin.r;
import org.apache.zeppelin.interpreter.Interpreter;
import org.apache.zeppelin.interpreter.InterpreterContext;
+import org.apache.zeppelin.interpreter.InterpreterException;
import org.apache.zeppelin.interpreter.InterpreterOutput;
+import org.apache.zeppelin.interpreter.InterpreterResult;
+import org.apache.zeppelin.interpreter.InterpreterResultMessage;
import org.apache.zeppelin.jupyter.IRKernelTest;
+import org.junit.Test;
+import java.io.IOException;
import java.util.HashMap;
+import java.util.List;
import java.util.Properties;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
public class IRInterpreterTest extends IRKernelTest {
@@ -43,4 +52,42 @@ public class IRInterpreterTest extends IRKernelTest {
.build();
return context;
}
+
+ @Test
+ public void testZShow() throws InterpreterException, IOException {
+ InterpreterContext context = getInterpreterContext();
+ InterpreterResult result = interpreter.interpret(
+ "df=data.frame(country=c(\"US\", \"GB\", \"BR\"),\n" +
+ "val1=c(10,13,14),\n" +
+ "val2=c(23,12,32))", context);
+ assertEquals(InterpreterResult.Code.SUCCESS, result.code());
+
+ context = getInterpreterContext();
+ result = interpreter.interpret("z.show(df)", context);
+ assertEquals(InterpreterResult.Code.SUCCESS, result.code());
+ List<InterpreterResultMessage> resultMessages = context.out.toInterpreterResultMessage();
+ assertEquals(1, resultMessages.size());
+ assertEquals(resultMessages.toString(),
+ InterpreterResult.Type.TABLE, resultMessages.get(0).getType());
+ assertEquals("country\tval1\tval2\n" +
+ "3\t10\t23\n" +
+ "2\t13\t12\n" +
+ "1\t14\t32\n",
+ resultMessages.get(0).getData());
+
+ context = getInterpreterContext();
+ result = interpreter.interpret("z.show(df, maxRows=1)", context);
+ assertEquals(InterpreterResult.Code.SUCCESS, result.code());
+ resultMessages = context.out.toInterpreterResultMessage();
+ assertEquals(2, resultMessages.size());
+ assertEquals(resultMessages.toString(),
+ InterpreterResult.Type.TABLE, resultMessages.get(0).getType());
+ assertEquals("country\tval1\tval2\n" +
+ "3\t10\t23\n",
+ resultMessages.get(0).getData());
+ assertEquals(resultMessages.toString(),
+ InterpreterResult.Type.HTML, resultMessages.get(1).getType());
+ assertEquals("<font color=red>Results are limited by 1 rows.</font>\n",
+ resultMessages.get(1).getData());
+ }
}
diff --git a/rlang/src/test/resources/log4j.properties b/rlang/src/test/resources/log4j.properties
index ed2672f..fa19880 100644
--- a/rlang/src/test/resources/log4j.properties
+++ b/rlang/src/test/resources/log4j.properties
@@ -24,4 +24,4 @@ log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
log4j.appender.stdout.layout.ConversionPattern=%5p [%d] ({%t} %F[%M]:%L) - %m%n
#log4j.logger.org.apache.zeppelin.interpreter.util=DEBUG
-#log4j.logger.org.apache.zeppelin.jupyter=DEBUG
+log4j.logger.org.apache.zeppelin.jupyter=DEBUG
diff --git a/zeppelin-interpreter/src/test/java/org/apache/zeppelin/interpreter/InterpreterOutputTest.java b/zeppelin-interpreter/src/test/java/org/apache/zeppelin/interpreter/InterpreterOutputTest.java
index 195bfcb..e07ec62 100644
--- a/zeppelin-interpreter/src/test/java/org/apache/zeppelin/interpreter/InterpreterOutputTest.java
+++ b/zeppelin-interpreter/src/test/java/org/apache/zeppelin/interpreter/InterpreterOutputTest.java
@@ -164,6 +164,16 @@ public class InterpreterOutputTest implements InterpreterOutputListener {
}
@Test
+ public void testMixTableText() throws IOException {
+ out.write("%table col1\tcol2\n\n%text val1".getBytes());
+ assertEquals(InterpreterResult.Type.TABLE, out.getOutputAt(0).getType());
+ assertEquals("col1\tcol2\n", new String(out.getOutputAt(0).toByteArray()));
+ out.flush();
+ assertEquals(InterpreterResult.Type.TEXT, out.getOutputAt(1).getType());
+ assertEquals("val1", new String(out.getOutputAt(1).toByteArray()));
+ }
+
+ @Test
public void testTruncate() throws IOException {
// output is truncated after the new line
InterpreterOutput.LIMIT = 3;