You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@datafu.apache.org by ey...@apache.org on 2019/04/21 10:18:13 UTC
[datafu] branch spark-tmp updated: Russell's review changes
This is an automated email from the ASF dual-hosted git repository.
eyal pushed a commit to branch spark-tmp
in repository https://gitbox.apache.org/repos/asf/datafu.git
The following commit(s) were added to refs/heads/spark-tmp by this push:
new c9f94f6 Russell's review changes
c9f94f6 is described below
commit c9f94f66813f76995f499efbd29cf34bbfc3eda6
Author: oraviv <or...@paypal.com>
AuthorDate: Wed Apr 17 09:04:54 2019 +0300
Russell's review changes
Signed-off-by: Eyal Allweil <ey...@apache.org>
---
datafu-spark/README.md | 20 ++++++++++----------
.../resources/pyspark_utils/init_spark_context.py | 2 +-
2 files changed, 11 insertions(+), 11 deletions(-)
diff --git a/datafu-spark/README.md b/datafu-spark/README.md
index ebad8e1..8bbba09 100644
--- a/datafu-spark/README.md
+++ b/datafu-spark/README.md
@@ -22,18 +22,18 @@ from pyspark_utils.df_utils import PySparkDFUtils
df_utils = PySparkDFUtils()
df_people = sqlContext.createDataFrame([
-... ("a", "Alice", 34),
-... ("a", "Sara", 33),
-... ("b", "Bob", 36),
-... ("b", "Charlie", 30),
-... ("c", "David", 29),
-... ("c", "Esther", 32),
-... ("c", "Fanny", 36),
-... ("c", "Zoey", 36)],
-... ["id", "name", "age"])
+ ("a", "Alice", 34),
+ ("a", "Sara", 33),
+ ("b", "Bob", 36),
+ ("b", "Charlie", 30),
+ ("c", "David", 29),
+ ("c", "Esther", 32),
+ ("c", "Fanny", 36),
+ ("c", "Zoey", 36)],
+ ["id", "name", "age"])
func_dedup_res = df_utils.dedup(dataFrame=df_people, groupCol=df_people.id,
-... orderCols=[df_people.age.desc(), df_people.name.desc()])
+ orderCols=[df_people.age.desc(), df_people.name.desc()])
func_dedup_res.registerTempTable("dedup")
diff --git a/datafu-spark/src/main/resources/pyspark_utils/init_spark_context.py b/datafu-spark/src/main/resources/pyspark_utils/init_spark_context.py
index d879d03..cceef01 100644
--- a/datafu-spark/src/main/resources/pyspark_utils/init_spark_context.py
+++ b/datafu-spark/src/main/resources/pyspark_utils/init_spark_context.py
@@ -2,4 +2,4 @@
from pyspark_utils.bridge_utils import get_contexts
sc, sqlContext, spark = get_contexts()
-print "initiated contexts"
+print("initiated contexts")