You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by de...@apache.org on 2017/04/07 18:58:21 UTC
[17/50] [abbrv] incubator-systemml git commit: [SYSTEMML-1181] Change
sqlContext to spark in MLContext docs
[SYSTEMML-1181] Change sqlContext to spark in MLContext docs
The variable sqlContext is not available by default in the spark shell anymore,
instead spark should be used to create DataFrames. Where methods expect an
instance of SqlContext, arguments are replaced with spark.sqlContext.
Closes #371.
Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/61f25f2b
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/61f25f2b
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/61f25f2b
Branch: refs/heads/gh-pages
Commit: 61f25f2b682446249ceb94c94da4e5b546cb3eec
Parents: b9d878c
Author: Felix Schueler <fe...@ibm.com>
Authored: Thu Feb 2 16:08:08 2017 -0800
Committer: Deron Eriksson <de...@us.ibm.com>
Committed: Thu Feb 2 16:08:08 2017 -0800
----------------------------------------------------------------------
spark-mlcontext-programming-guide.md | 24 ++++++++++++------------
1 file changed, 12 insertions(+), 12 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/61f25f2b/spark-mlcontext-programming-guide.md
----------------------------------------------------------------------
diff --git a/spark-mlcontext-programming-guide.md b/spark-mlcontext-programming-guide.md
index dcaa125..8c0a79f 100644
--- a/spark-mlcontext-programming-guide.md
+++ b/spark-mlcontext-programming-guide.md
@@ -141,7 +141,7 @@ val numRows = 10000
val numCols = 1000
val data = sc.parallelize(0 to numRows-1).map { _ => Row.fromSeq(Seq.fill(numCols)(Random.nextDouble)) }
val schema = StructType((0 to numCols-1).map { i => StructField("C" + i, DoubleType, true) } )
-val df = sqlContext.createDataFrame(data, schema)
+val df = spark.createDataFrame(data, schema)
{% endhighlight %}
</div>
@@ -167,7 +167,7 @@ data: org.apache.spark.rdd.RDD[org.apache.spark.sql.Row] = MapPartitionsRDD[1] a
scala> val schema = StructType((0 to numCols-1).map { i => StructField("C" + i, DoubleType, true) } )
schema: org.apache.spark.sql.types.StructType = StructType(StructField(C0,DoubleType,true), StructField(C1,DoubleType,true), StructField(C2,DoubleType,true), StructField(C3,DoubleType,true), StructField(C4,DoubleType,true), StructField(C5,DoubleType,true), StructField(C6,DoubleType,true), StructField(C7,DoubleType,true), StructField(C8,DoubleType,true), StructField(C9,DoubleType,true), StructField(C10,DoubleType,true), StructField(C11,DoubleType,true), StructField(C12,DoubleType,true), StructField(C13,DoubleType,true), StructField(C14,DoubleType,true), StructField(C15,DoubleType,true), StructField(C16,DoubleType,true), StructField(C17,DoubleType,true), StructField(C18,DoubleType,true), StructField(C19,DoubleType,true), StructField(C20,DoubleType,true), StructField(C21,DoubleType,true), ...
-scala> val df = sqlContext.createDataFrame(data, schema)
+scala> val df = spark.createDataFrame(data, schema)
df: org.apache.spark.sql.DataFrame = [C0: double, C1: double, C2: double, C3: double, C4: double, C5: double, C6: double, C7: double, C8: double, C9: double, C10: double, C11: double, C12: double, C13: double, C14: double, C15: double, C16: double, C17: double, C18: double, C19: double, C20: double, C21: double, C22: double, C23: double, C24: double, C25: double, C26: double, C27: double, C28: double, C29: double, C30: double, C31: double, C32: double, C33: double, C34: double, C35: double, C36: double, C37: double, C38: double, C39: double, C40: double, C41: double, C42: double, C43: double, C44: double, C45: double, C46: double, C47: double, C48: double, C49: double, C50: double, C51: double, C52: double, C53: double, C54: double, C55: double, C56: double, C57: double, C58: double, C5...
{% endhighlight %}
@@ -1540,7 +1540,7 @@ val numRows = 10000
val numCols = 1000
val data = sc.parallelize(0 to numRows-1).map { _ => Row.fromSeq(Seq.fill(numCols)(Random.nextDouble)) }
val schema = StructType((0 to numCols-1).map { i => StructField("C" + i, DoubleType, true) } )
-val df = sqlContext.createDataFrame(data, schema)
+val df = spark.createDataFrame(data, schema)
val mm = new MatrixMetadata(numRows, numCols)
val minMaxMeanScript = dml(minMaxMean).in("Xin", df, mm).out("minOut", "maxOut", "meanOut")
val minMaxMeanScript = dml(minMaxMean).in("Xin", df, mm).out("minOut", "maxOut", "meanOut")
@@ -1561,7 +1561,7 @@ val numRows = 10000
val numCols = 1000
val data = sc.parallelize(0 to numRows-1).map { _ => Row.fromSeq(Seq.fill(numCols)(Random.nextDouble)) }
val schema = StructType((0 to numCols-1).map { i => StructField("C" + i, DoubleType, true) } )
-val df = sqlContext.createDataFrame(data, schema)
+val df = spark.createDataFrame(data, schema)
val mm = new MatrixMetadata(numRows, numCols)
val bbm = new BinaryBlockMatrix(df, mm)
val minMaxMeanScript = dml(minMaxMean).in("Xin", bbm).out("minOut", "maxOut", "meanOut")
@@ -1852,7 +1852,7 @@ data: org.apache.spark.rdd.RDD[org.apache.spark.sql.Row] = MapPartitionsRDD[1] a
scala> val schema = StructType((0 to numCols-1).map { i => StructField("C" + i, DoubleType, true) } )
schema: org.apache.spark.sql.types.StructType = StructType(StructField(C0,DoubleType,true), StructField(C1,DoubleType,true), StructField(C2,DoubleType,true), StructField(C3,DoubleType,true), StructField(C4,DoubleType,true), StructField(C5,DoubleType,true), StructField(C6,DoubleType,true), StructField(C7,DoubleType,true), StructField(C8,DoubleType,true), StructField(C9,DoubleType,true), StructField(C10,DoubleType,true), StructField(C11,DoubleType,true), StructField(C12,DoubleType,true), StructField(C13,DoubleType,true), StructField(C14,DoubleType,true), StructField(C15,DoubleType,true), StructField(C16,DoubleType,true), StructField(C17,DoubleType,true), StructField(C18,DoubleType,true), StructField(C19,DoubleType,true), StructField(C20,DoubleType,true), StructField(C21,DoubleType,true), ...
-scala> val df = sqlContext.createDataFrame(data, schema)
+scala> val df = spark.createDataFrame(data, schema)
df: org.apache.spark.sql.DataFrame = [C0: double, C1: double, C2: double, C3: double, C4: double, C5: double, C6: double, C7: double, C8: double, C9: double, C10: double, C11: double, C12: double, C13: double, C14: double, C15: double, C16: double, C17: double, C18: double, C19: double, C20: double, C21: double, C22: double, C23: double, C24: double, C25: double, C26: double, C27: double, C28: double, C29: double, C30: double, C31: double, C32: double, C33: double, C34: double, C35: double, C36: double, C37: double, C38: double, C39: double, C40: double, C41: double, C42: double, C43: double, C44: double, C45: double, C46: double, C47: double, C48: double, C49: double, C50: double, C51: double, C52: double, C53: double, C54: double, C55: double, C56: double, C57: double, C58: double, C5...
{% endhighlight %}
@@ -1867,7 +1867,7 @@ val numRows = 100000
val numCols = 1000
val data = sc.parallelize(0 to numRows-1).map { _ => Row.fromSeq(Seq.fill(numCols)(Random.nextDouble)) }
val schema = StructType((0 to numCols-1).map { i => StructField("C" + i, DoubleType, true) } )
-val df = sqlContext.createDataFrame(data, schema)
+val df = spark.createDataFrame(data, schema)
{% endhighlight %}
</div>
@@ -1889,7 +1889,7 @@ scala> import org.apache.sysml.api.MLOutput
import org.apache.sysml.api.MLOutput
scala> def getScalar(outputs: MLOutput, symbol: String): Any =
- | outputs.getDF(sqlContext, symbol).first()(1)
+ | outputs.getDF(spark.sqlContext, symbol).first()(1)
getScalar: (outputs: org.apache.sysml.api.MLOutput, symbol: String)Any
scala> def getScalarDouble(outputs: MLOutput, symbol: String): Double =
@@ -1907,7 +1907,7 @@ getScalarInt: (outputs: org.apache.sysml.api.MLOutput, symbol: String)Int
{% highlight scala %}
import org.apache.sysml.api.MLOutput
def getScalar(outputs: MLOutput, symbol: String): Any =
-outputs.getDF(sqlContext, symbol).first()(1)
+outputs.getDF(spark.sqlContext, symbol).first()(1)
def getScalarDouble(outputs: MLOutput, symbol: String): Double =
getScalar(outputs, symbol).asInstanceOf[Double]
def getScalarInt(outputs: MLOutput, symbol: String): Int =
@@ -2264,7 +2264,7 @@ The Spark `LinearDataGenerator` is used to generate test data for the Spark ML a
{% highlight scala %}
// Generate data
import org.apache.spark.mllib.util.LinearDataGenerator
-import sqlContext.implicits._
+import spark.implicits._
val numRows = 10000
val numCols = 1000
@@ -2549,7 +2549,7 @@ This cell contains helper methods to return `Double` and `Int` values from outpu
import org.apache.sysml.api.MLOutput
def getScalar(outputs: MLOutput, symbol: String): Any =
- outputs.getDF(sqlContext, symbol).first()(1)
+ outputs.getDF(spark.sqlContext, symbol).first()(1)
def getScalarDouble(outputs: MLOutput, symbol: String): Double =
getScalar(outputs, symbol).asInstanceOf[Double]
@@ -2638,7 +2638,7 @@ val outputs = ml.executeScript(linearReg)
val trainingTime = (System.currentTimeMillis() - start).toDouble / 1000.0
// Get outputs
-val B = outputs.getDF(sqlContext, "beta_out").sort("ID").drop("ID")
+val B = outputs.getDF(spark.sqlContext, "beta_out").sort("ID").drop("ID")
val r2 = getScalarDouble(outputs, "R2")
val iters = getScalarInt(outputs, "totalIters")
val trainingTimePerIter = trainingTime / iters
@@ -2815,7 +2815,7 @@ outputs = ml.executeScript(pnmf, {"X": X_train, "maxiter": 100, "rank": 10}, ["W
{% highlight python %}
# Plot training loss over time
-losses = outputs.getDF(sqlContext, "losses")
+losses = outputs.getDF(spark.sqlContext, "losses")
xy = losses.sort(losses.ID).map(lambda r: (r[0], r[1])).collect()
x, y = zip(*xy)
plt.plot(x, y)