You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by rx...@apache.org on 2015/05/06 17:51:10 UTC
spark git commit: [SQL] JavaDoc update for various DataFrame
functions.
Repository: spark
Updated Branches:
refs/heads/master 32cdc815c -> 322e7e7f6
[SQL] JavaDoc update for various DataFrame functions.
Author: Reynold Xin <rx...@databricks.com>
Closes #5935 from rxin/df-doc1 and squashes the following commits:
aaeaadb [Reynold Xin] [SQL] JavaDoc update for various DataFrame functions.
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/322e7e7f
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/322e7e7f
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/322e7e7f
Branch: refs/heads/master
Commit: 322e7e7f689947aef29909572ee0c0e110ea23b8
Parents: 32cdc81
Author: Reynold Xin <rx...@databricks.com>
Authored: Wed May 6 08:50:56 2015 -0700
Committer: Reynold Xin <rx...@databricks.com>
Committed: Wed May 6 08:50:56 2015 -0700
----------------------------------------------------------------------
.../main/scala/org/apache/spark/sql/Column.scala | 4 ++--
.../scala/org/apache/spark/sql/DataFrame.scala | 16 ++++++++--------
.../spark/sql/DataFrameStatFunctions.scala | 19 +++++++++++++++----
.../scala/org/apache/spark/sql/functions.scala | 14 +++++++-------
4 files changed, 32 insertions(+), 21 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/322e7e7f/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
index c0503bf..8eb632d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
@@ -84,14 +84,14 @@ class Column(protected[sql] val expr: Expression) extends Logging {
/**
* Inversion of boolean expression, i.e. NOT.
- * {{
+ * {{{
* // Scala: select rows that are not active (isActive === false)
* df.filter( !df("isActive") )
*
* // Java:
* import static org.apache.spark.sql.functions.*;
* df.filter( not(df.col("isActive")) );
- * }}
+ * }}}
*
* @group expr_ops
*/
http://git-wip-us.apache.org/repos/asf/spark/blob/322e7e7f/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala
index aceb7a9..9d2cd7a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala
@@ -681,11 +681,11 @@ class DataFrame private[sql](
/**
* (Scala-specific) Aggregates on the entire [[DataFrame]] without groups.
- * {{
+ * {{{
* // df.agg(...) is a shorthand for df.groupBy().agg(...)
* df.agg("age" -> "max", "salary" -> "avg")
* df.groupBy().agg("age" -> "max", "salary" -> "avg")
- * }}
+ * }}}
* @group dfops
*/
def agg(aggExpr: (String, String), aggExprs: (String, String)*): DataFrame = {
@@ -694,33 +694,33 @@ class DataFrame private[sql](
/**
* (Scala-specific) Aggregates on the entire [[DataFrame]] without groups.
- * {{
+ * {{{
* // df.agg(...) is a shorthand for df.groupBy().agg(...)
* df.agg(Map("age" -> "max", "salary" -> "avg"))
* df.groupBy().agg(Map("age" -> "max", "salary" -> "avg"))
- * }}
+ * }}}
* @group dfops
*/
def agg(exprs: Map[String, String]): DataFrame = groupBy().agg(exprs)
/**
* (Java-specific) Aggregates on the entire [[DataFrame]] without groups.
- * {{
+ * {{{
* // df.agg(...) is a shorthand for df.groupBy().agg(...)
* df.agg(Map("age" -> "max", "salary" -> "avg"))
* df.groupBy().agg(Map("age" -> "max", "salary" -> "avg"))
- * }}
+ * }}}
* @group dfops
*/
def agg(exprs: java.util.Map[String, String]): DataFrame = groupBy().agg(exprs)
/**
* Aggregates on the entire [[DataFrame]] without groups.
- * {{
+ * {{{
* // df.agg(...) is a shorthand for df.groupBy().agg(...)
* df.agg(max($"age"), avg($"salary"))
* df.groupBy().agg(max($"age"), avg($"salary"))
- * }}
+ * }}}
* @group dfops
*/
@scala.annotation.varargs
http://git-wip-us.apache.org/repos/asf/spark/blob/322e7e7f/sql/core/src/main/scala/org/apache/spark/sql/DataFrameStatFunctions.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameStatFunctions.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameStatFunctions.scala
index cb88dea..a1e7447 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameStatFunctions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameStatFunctions.scala
@@ -37,7 +37,7 @@ final class DataFrameStatFunctions private[sql](df: DataFrame) {
StatFunctions.calculateCov(df, Seq(col1, col2))
}
- /*
+ /**
* Calculates the correlation of two columns of a DataFrame. Currently only supports the Pearson
* Correlation Coefficient. For Spearman Correlation, consider using RDD methods found in
* MLlib's Statistics.
@@ -75,7 +75,7 @@ final class DataFrameStatFunctions private[sql](df: DataFrame) {
* each row.
* @param col2 The name of the second column. Distinct items will make the column names
* of the DataFrame.
- * @return A Local DataFrame containing the table
+ * @return A DataFrame containing for the contingency table.
*/
def crosstab(col1: String, col2: String): DataFrame = {
StatFunctions.crossTabulate(df, col1, col2)
@@ -110,14 +110,25 @@ final class DataFrameStatFunctions private[sql](df: DataFrame) {
}
/**
- * Python friendly implementation for `freqItems`
+ * (Scala-specific) Finding frequent items for columns, possibly with false positives. Using the
+ * frequent element count algorithm described in
+ * [[http://dx.doi.org/10.1145/762471.762473, proposed by Karp, Schenker, and Papadimitriou]].
+ *
+ * @param cols the names of the columns to search frequent items in.
+ * @return A Local DataFrame with the Array of frequent items for each column.
*/
def freqItems(cols: Seq[String], support: Double): DataFrame = {
FrequentItems.singlePassFreqItems(df, cols, support)
}
/**
- * Python friendly implementation for `freqItems` with a default `support` of 1%.
+ * (Scala-specific) Finding frequent items for columns, possibly with false positives. Using the
+ * frequent element count algorithm described in
+ * [[http://dx.doi.org/10.1145/762471.762473, proposed by Karp, Schenker, and Papadimitriou]].
+ * Uses a `default` support of 1%.
+ *
+ * @param cols the names of the columns to search frequent items in.
+ * @return A Local DataFrame with the Array of frequent items for each column.
*/
def freqItems(cols: Seq[String]): DataFrame = {
FrequentItems.singlePassFreqItems(df, cols, 0.01)
http://git-wip-us.apache.org/repos/asf/spark/blob/322e7e7f/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
index f2bb453..830b501 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
@@ -22,7 +22,7 @@ import scala.reflect.runtime.universe.{TypeTag, typeTag}
import org.apache.spark.annotation.Experimental
import org.apache.spark.sql.catalyst.ScalaReflection
-import org.apache.spark.sql.catalyst.analysis.{UnresolvedAttribute, UnresolvedFunction, Star}
+import org.apache.spark.sql.catalyst.analysis.{UnresolvedFunction, Star}
import org.apache.spark.sql.catalyst.expressions._
import org.apache.spark.sql.catalyst.expressions.mathfuncs._
import org.apache.spark.sql.types._
@@ -86,10 +86,10 @@ object functions {
/**
* Returns a sort expression based on ascending order of the column.
- * {{
+ * {{{
* // Sort by dept in ascending order, and then age in descending order.
* df.sort(asc("dept"), desc("age"))
- * }}
+ * }}}
*
* @group sort_funcs
*/
@@ -97,10 +97,10 @@ object functions {
/**
* Returns a sort expression based on the descending order of the column.
- * {{
+ * {{{
* // Sort by dept in ascending order, and then age in descending order.
* df.sort(asc("dept"), desc("age"))
- * }}
+ * }}}
*
* @group sort_funcs
*/
@@ -353,13 +353,13 @@ object functions {
/**
* Inversion of boolean expression, i.e. NOT.
- * {{
+ * {{{
* // Scala: select rows that are not active (isActive === false)
* df.filter( !df("isActive") )
*
* // Java:
* df.filter( not(df.col("isActive")) );
- * }}
+ * }}}
*
* @group normal_funcs
*/
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org