You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@zeppelin.apache.org by bz...@apache.org on 2016/11/09 06:41:33 UTC

zeppelin git commit: [ZEPPELIN-1358] Add support to display Pandas DataFrame index using z.show()

Repository: zeppelin
Updated Branches:
  refs/heads/master a4f9f6bad -> dbd81bf85


[ZEPPELIN-1358] Add support to display Pandas DataFrame index using z.show()

### What is this PR for?

Add support to display optionally Pandas DataFrame index using z.show(show_index=True) in python interpreter. By default, DataFrame index will not be displayed.
### What type of PR is it?

Improvement
### What is the Jira issue?

[ZEPPELIN-1358](https://issues.apache.org/jira/browse/ZEPPELIN-1358)
### How should this be tested?

```
mvn -Dpython.test.exclude='' test -pl python -am
```
### Screenshots (if appropriate)

![screenshot from 2016-10-09 18-18-44](https://cloud.githubusercontent.com/assets/7907284/19223745/b88d07c6-8e4d-11e6-9592-c66f2e4a5ed2.png)
### Questions:
- Does the licenses files need update? no
- Is there breaking changes for older versions? no
- Does this needs documentation? no

Author: Paul Bustios <pb...@gmail.com>

Closes #1378 from bustios/ZEPPELIN-1358 and squashes the following commits:

7842f71 [Paul Bustios] Add param to make the index to be shown optinally
6767b40 [Paul Bustios] Add support to display Pandas DataFrame index using z.show() and modifies test.


Project: http://git-wip-us.apache.org/repos/asf/zeppelin/repo
Commit: http://git-wip-us.apache.org/repos/asf/zeppelin/commit/dbd81bf8
Tree: http://git-wip-us.apache.org/repos/asf/zeppelin/tree/dbd81bf8
Diff: http://git-wip-us.apache.org/repos/asf/zeppelin/diff/dbd81bf8

Branch: refs/heads/master
Commit: dbd81bf85610ce8dff4e9148e63f67437a2fdc41
Parents: a4f9f6ba
Author: Paul Bustios <pb...@gmail.com>
Authored: Sun Oct 9 17:38:42 2016 -0300
Committer: Alexander Bezzubov <bz...@apache.org>
Committed: Wed Nov 9 15:41:14 2016 +0900

----------------------------------------------------------------------
 python/src/main/resources/bootstrap.py                   | 11 +++++++++--
 .../zeppelin/python/PythonInterpreterPandasSqlTest.java  |  9 ++++++---
 2 files changed, 15 insertions(+), 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/zeppelin/blob/dbd81bf8/python/src/main/resources/bootstrap.py
----------------------------------------------------------------------
diff --git a/python/src/main/resources/bootstrap.py b/python/src/main/resources/bootstrap.py
index fc58693..dbe5e35 100644
--- a/python/src/main/resources/bootstrap.py
+++ b/python/src/main/resources/bootstrap.py
@@ -139,11 +139,14 @@ class PyZeppelinContext(object):
         elif hasattr(p, '__call__'):
             p() #error reporting
     
-    def show_dataframe(self, df, **kwargs):
+    def show_dataframe(self, df, show_index=False, **kwargs):
         """Pretty prints DF using Table Display System
         """
         limit = len(df) > self.max_result
         header_buf = StringIO("")
+        if show_index:
+            idx_name = str(df.index.name) if df.index.name is not None else ""
+            header_buf.write(idx_name + "\t")
         header_buf.write(str(df.columns[0]))
         for col in df.columns[1:]:
             header_buf.write("\t")
@@ -152,7 +155,11 @@ class PyZeppelinContext(object):
         
         body_buf = StringIO("")
         rows = df.head(self.max_result).values if limit else df.values
-        for row in rows:
+        index = df.index.values
+        for idx, row in zip(index, rows):
+            if show_index:
+                body_buf.write("%html <strong>{}</strong>".format(idx))
+                body_buf.write("\t")
             body_buf.write(str(row[0]))
             for cell in row[1:]:
                 body_buf.write("\t")

http://git-wip-us.apache.org/repos/asf/zeppelin/blob/dbd81bf8/python/src/test/java/org/apache/zeppelin/python/PythonInterpreterPandasSqlTest.java
----------------------------------------------------------------------
diff --git a/python/src/test/java/org/apache/zeppelin/python/PythonInterpreterPandasSqlTest.java b/python/src/test/java/org/apache/zeppelin/python/PythonInterpreterPandasSqlTest.java
index f953856..9154394 100644
--- a/python/src/test/java/org/apache/zeppelin/python/PythonInterpreterPandasSqlTest.java
+++ b/python/src/test/java/org/apache/zeppelin/python/PythonInterpreterPandasSqlTest.java
@@ -159,17 +159,20 @@ public class PythonInterpreterPandasSqlTest {
     ret = python.interpret("import pandas as pd", context);
     ret = python.interpret("import numpy as np", context);
 
-    // given a Pandas DataFrame with non-text data
+    // given a Pandas DataFrame with an index and non-text data
+    ret = python.interpret("index = pd.Index([10, 11, 12, 13], name='index_name')", context);
     ret = python.interpret("d1 = {1 : [np.nan, 1, 2, 3], 'two' : [3., 4., 5., 6.7]}", context);
-    ret = python.interpret("df1 = pd.DataFrame(d1)", context);
+    ret = python.interpret("df1 = pd.DataFrame(d1, index=index)", context);
     assertEquals(ret.message(), InterpreterResult.Code.SUCCESS, ret.code());
 
     // when
-    ret = python.interpret("z.show(df1)", context);
+    ret = python.interpret("z.show(df1, show_index=True)", context);
 
     // then
     assertEquals(ret.message(), InterpreterResult.Code.SUCCESS, ret.code());
     assertEquals(ret.message(), Type.TABLE, ret.type());
+    assertTrue(ret.message().indexOf("index_name") == 0);
+    assertTrue(ret.message().indexOf("13") > 0);
     assertTrue(ret.message().indexOf("nan") > 0);
     assertTrue(ret.message().indexOf("6.7") > 0);
   }